在跟我实在不熟的Python奋斗了若干时间后,我才悲哀地发现,Blogger现在对API导入添加了限制。每天通过API发布一定量的帖子后,再发就得输入验证码,我太阳。
还好,通过尝试发现,Blogger GUI中提供的导入功能可以用。不然的话,某人的小500张帖子,还不知道要弄到什么时候去。
用Javascript写这段代码,看起来就乱七八糟的。不过没办法,还是因为Ruby没有好用的HTMLParser。虽然可以在Ruby里可以调用ActiveX COM,但是会有回车换行符混乱的问题,将就用吧。
// 这个函数用来处理内文需要修改的地方
// 如果没什么可改的那就不需要调用
function parseHTML(src) {
var doc = new ActiveXObject('htmlfile');
doc.write(src);
var es = doc.getElementsByTagName('img');
for (var i=0;i<es.length;i++) {
var s = es[i].src;
if (s.indexOf('foto.ycstatic.com')>0) {
es[i].src = imgmap[encodeURIComponent(s)];
}
}
return doc.body.innerHTML;
}
function parseXML(path) {
var xml = new ActiveXObject('MSXML2.DOMDocument.3.0');
xml.load(path);
var ret = "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='http://www.blogger.com/styles/atom.css' type='text/css'?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-7617775710611731452.archive</id><updated>2009-02-08T18:10:51.127+08:00</updated><title type='text'>幺贰和叁</title><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://otnth.blogspot.com/feeds/archive'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7617775710611731452/archive'/><link rel='http://schemas.google.com/g/2005#post' type='application/atom+xml' href='http://www.blogger.com/feeds/7617775710611731452/archive'/><link rel='alternate' type='text/html' href='http://otnth.blogspot.com/'/><author><name>小八</name><email>noreply@blogger.com</email></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator>";
var es = xml.getElementsByTagName('feed/entry');
for (var i=0;i<es.length;i++) {
ret += '<entry><id>' + es[i].getElementsByTagName('id')[0].firstChild.nodeValue + '</id>';
ret += '<published>' + es[i].getElementsByTagName('published')[0].firstChild.nodeValue + '</published>';
ret += '<updated>' + es[i].getElementsByTagName('updated')[0].firstChild.nodeValue + '</updated>';
ret += "<category scheme='http://schemas.google.com/g/2005#kind' term='http://schemas.google.com/blogger/2008/kind#post'/>";
var cats = es[i].getElementsByTagName('category');
for (var j=0;j<cats.length;j++) {
if (cats[j].getAttribute('scheme') != 'http://www.google.com/reader/') {
ret += "<category scheme='http://www.blogger.com/atom/ns#' term='" + cats[j].getAttribute('term') + "'/>";
}
}
ret += '<title type="text">' + es[i].getElementsByTagName('title')[0].firstChild.nodeValue + '</title>';
s = '<div class="oldpost_ycool">' + es[i].getElementsByTagName('summary')[0].firstChild.nodeValue.replace(/\.{3}$/, '') + '</div>';
//s = parseHTML(s);
ret += '<content type="html"><![CDATA[' + s + ']]></content>';
ret += "<author><name>小八</name></author>";
ret += "<thr:total>0</thr:total></entry>";
}
ret += '</feed>';
var xn = new ActiveXObject('MSXML2.DOMDocument.3.0');
xn.async = false;
xn.loadXML(ret);
if (xn.parseError != 0) {
var oError = xn.parseError;
throw new Error("An error occurred:\n错误代码: "
+ oError.errorCode + "\n"
+ "行数: " + oError.line + "\n"
+ "列数: " + oError.linepos + "\n"
+ "原因: " + oError.reason);
} else {
xn.save(path + '.txt');
}
}
function main() {
var fso = new ActiveXObject('Scripting.FileSystemObject');
var fd = fso.GetFolder('.');
var fc = new Enumerator(fd.Files);
for (;!fc.atEnd();fc.moveNext()) {
var s = String(fc.item());
if (s.substr(s.length-4) == '.xml') {
parseXML(s);
}
}
}
main();
没有评论 :
发表评论