Blog搬家,文字部分其实很好处理,麻烦的是图片。如果BSP下手狠一点直接删掉,而且本地又没有备份的话,那就全完了。
歪酷到目前为止相册都还没动,不知道是没检查到还是怎样,总之现在还能抓就是了。
因为Ruby没有好用的HTMLParser类,所以以下代码用的是Javascript。由于我不确定歪酷服务器上的图片文件名是否唯一,所以直接将URL处理后作为文件名保存。
function parseHTML(src) {
var doc = new ActiveXObject('htmlfile');
doc.write(src);
var es = doc.getElementsByTagName('img');
for (var i=0;i<es.length;i++) {
var s = es[i].src;
if (s.indexOf('foto.ycstatic.com')>0) {
var c = 'wget -O ' + encodeURIComponent(s) + ' ' + s;
// 批处理文件需要
c = c.replace('%', '%%');
WScript.echo(c);
}
}
}
function parseXML(path) {
var xml = new ActiveXObject('MSXML2.DOMDocument.3.0');
xml.load(path);
var es = xml.getElementsByTagName('feed/entry/summary');
for (var i=0;i<es.length;i++) {
parseHTML(es[i].firstChild.nodeValue);
}
}
function main() {
var fso = new ActiveXObject('Scripting.FileSystemObject');
var fd = fso.GetFolder('.');
var fc = new Enumerator(fd.Files);
for (;!fc.atEnd();fc.moveNext()) {
var s = String(fc.item());
if (s.substr(s.length-4) == '.xml') {
parseXML(s);
}
}
}
main();
上传至Picasa的代码倒没什么可解释的,只需要留意一点:每个Picasa相册最多只能容纳500张图片。
require 'net/https'
require 'uri'
require 'rexml/document'
require 'FileUtils'
def getAuth(email, passwd)
uri = URI.parse('https://www.google.com/accounts/ClientLogin')
req = Net::HTTP::Post.new(uri.path)
req.set_form_data({'Email'=>email, 'Passwd'=>passwd, 'service'=>'lh2'})
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
res = http.start {|h| h.request(req)}
case res
when Net::HTTPSuccess
res.body.split("\n").each { |s|
return s if s[0,5] == 'Auth='
}
else
puts res.body
res.error!
end
end
def ul2Picasa(auth, albumid, title, summary, filename, ftype='image/jpeg')
uri = URI.parse("http://picasaweb.google.com/data/feed/api/user/default/albumid/#{albumid}")
body = <<EOF
Media multipart posting
--END_OF_PART
Content-Type: application/atom+xml
<entry xmlns='http://www.w3.org/2005/Atom'>
<title>#{title}</title>
<summary>#{summary}</summary>
<category scheme="http://schemas.google.com/g/2005#kind"
term="http://schemas.google.com/photos/2007#photo"/>
</entry>
--END_OF_PART
Content-Type: #{ftype}
#{File.open(filename, 'rb') {|f| f.read()}}
--END_OF_PART--
EOF
req = Net::HTTP::Post.new(uri.path)
req.body = body
req.set_content_type('multipart/related; boundary="END_OF_PART"')
req['MIME-version'] = '1.0'
req['Authorization'] = "GoogleLogin #{auth}"
req['Content-Length'] = body.length
http = Net::HTTP.new(uri.host, uri.port)
res = http.start {|h| h.request(req)}
case res
when Net::HTTPSuccess
xmldoc = REXML::Document.new(res.body)
yield xmldoc.get_elements('entry/content')[0].attributes.get_attribute('src').to_s
else
puts res.body
res.error!
end
end
email = 'USERNAME@gmail.com'
passwd = 'USERPASSWORD'
# 该值可通过收工创建相册并查看源代码获得
albumid = 'NNNNNNNNNNNNNNNNNN'
# 把上传完成的文件移动到该目录
mvdir = '@up'
auth = getAuth(email, passwd)
# 作纪录备用
flog = File.open('piclog.txt', 'w')
Dir.glob('*.jpg') { |fn|
ul2Picasa(auth, albumid, fn, '', fn) { |url|
puts fn
flog.puts fn + "\t" + url.sub(/(\/[^\/]+)$/, '/s800\1')
FileUtils.move(fn, mvdir)
}
}
flog.close