在跟我实在不熟的Python奋斗了若干时间后,我才悲哀地发现,Blogger现在对API导入添加了限制。每天通过API发布一定量的帖子后,再发就得输入验证码,我太阳。
还好,通过尝试发现,Blogger GUI中提供的导入功能可以用。不然的话,某人的小500张帖子,还不知道要弄到什么时候去。
用Javascript写这段代码,看起来就乱七八糟的。不过没办法,还是因为Ruby没有好用的HTMLParser。虽然可以在Ruby里可以调用ActiveX COM,但是会有回车换行符混乱的问题,将就用吧。
// 这个函数用来处理内文需要修改的地方 // 如果没什么可改的那就不需要调用 function parseHTML(src) { var doc = new ActiveXObject('htmlfile'); doc.write(src); var es = doc.getElementsByTagName('img'); for (var i=0;i<es.length;i++) { var s = es[i].src; if (s.indexOf('foto.ycstatic.com')>0) { es[i].src = imgmap[encodeURIComponent(s)]; } } return doc.body.innerHTML; } function parseXML(path) { var xml = new ActiveXObject('MSXML2.DOMDocument.3.0'); xml.load(path); var ret = "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='http://www.blogger.com/styles/atom.css' type='text/css'?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-7617775710611731452.archive</id><updated>2009-02-08T18:10:51.127+08:00</updated><title type='text'>幺贰和叁</title><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://otnth.blogspot.com/feeds/archive'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7617775710611731452/archive'/><link rel='http://schemas.google.com/g/2005#post' type='application/atom+xml' href='http://www.blogger.com/feeds/7617775710611731452/archive'/><link rel='alternate' type='text/html' href='http://otnth.blogspot.com/'/><author><name>小八</name><email>noreply@blogger.com</email></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator>"; var es = xml.getElementsByTagName('feed/entry'); for (var i=0;i<es.length;i++) { ret += '<entry><id>' + es[i].getElementsByTagName('id')[0].firstChild.nodeValue + '</id>'; ret += '<published>' + es[i].getElementsByTagName('published')[0].firstChild.nodeValue + '</published>'; ret += '<updated>' + es[i].getElementsByTagName('updated')[0].firstChild.nodeValue + '</updated>'; ret += "<category scheme='http://schemas.google.com/g/2005#kind' term='http://schemas.google.com/blogger/2008/kind#post'/>"; var cats = es[i].getElementsByTagName('category'); for (var j=0;j<cats.length;j++) { if (cats[j].getAttribute('scheme') != 'http://www.google.com/reader/') { ret += "<category scheme='http://www.blogger.com/atom/ns#' term='" + cats[j].getAttribute('term') + "'/>"; } } ret += '<title type="text">' + es[i].getElementsByTagName('title')[0].firstChild.nodeValue + '</title>'; s = '<div class="oldpost_ycool">' + es[i].getElementsByTagName('summary')[0].firstChild.nodeValue.replace(/\.{3}$/, '') + '</div>'; //s = parseHTML(s); ret += '<content type="html"><![CDATA[' + s + ']]></content>'; ret += "<author><name>小八</name></author>"; ret += "<thr:total>0</thr:total></entry>"; } ret += '</feed>'; var xn = new ActiveXObject('MSXML2.DOMDocument.3.0'); xn.async = false; xn.loadXML(ret); if (xn.parseError != 0) { var oError = xn.parseError; throw new Error("An error occurred:\n错误代码: " + oError.errorCode + "\n" + "行数: " + oError.line + "\n" + "列数: " + oError.linepos + "\n" + "原因: " + oError.reason); } else { xn.save(path + '.txt'); } } function main() { var fso = new ActiveXObject('Scripting.FileSystemObject'); var fd = fso.GetFolder('.'); var fc = new Enumerator(fd.Files); for (;!fc.atEnd();fc.moveNext()) { var s = String(fc.item()); if (s.substr(s.length-4) == '.xml') { parseXML(s); } } } main();
没有评论 :
发表评论