2009年11月19日星期四

eD2k Hash in Ruby

很久以来一直都以为eD2k Hash就是MD4(罪魁祸首),这几天才发现错得很离谱。

花了点时间查资料,写了个用以生成eD2k链接的模块,包括AICH部分。代码有个偷懒的地方,读取文件的缓存大小必须为PART_SIZE,如果要拿去用的话请留意。Ruby 1.8.6和1.8.7的File.size在Windows平台下有bug,当文件尺寸比较大时会返回负数,这一点也请留意。

部分代码移植自pyaich和Python的Base64模块,GPL许可证。

require 'openssl'

module OtNtH
  module Digest
    # fit for eD2k Hash
    PART_SIZE = 9_728_000
    BLOCK_SIZE = 184_320

    class ED2K
      def initialize
        @h = ''
        @l = 0
      end
      
      def update(part)
        @h += OpenSSL::Digest::MD4.new(part).digest
        @l = part.size
      end
      
      def hexdigest
        @h += OpenSSL::Digest::MD4.new('').digest if @l == PART_SIZE
        OpenSSL::Digest::MD4.new(@h).hexdigest
      end
      
      def name
        'ED2K'
      end
    end # class ED2K

    class AICH
      class HashTree
        attr_accessor :dataSize, :isLeft, :baseSize, :hash, :leftTree, :rightTree
        
        def initialize(dataSize, isLeftBranch, baseSize)
          @dataSize = dataSize
          @isLeft = isLeftBranch
          @baseSize = baseSize
          @hash = nil
          @leftTree = nil
          @rightTree = nil
        end
        
        def loadLowestLevelHashes(hashList)
          if @dataSize <= @baseSize then
            @hash = hashList.shift
            return true
          else
            extra = @dataSize % @baseSize != 0 ? 1 : 0
            numBlocks = @dataSize / @baseSize + extra
            extra = isLeft ? 1 : 0
            sizeLeft = ((numBlocks + extra)/2) * @baseSize
            sizeRight = @dataSize - sizeLeft
            leftBaseSize = sizeLeft <= PART_SIZE ? BLOCK_SIZE : PART_SIZE
            @leftTree = HashTree.new(sizeLeft, true, leftBaseSize)
            
            rightBaseSize = sizeRight <= PART_SIZE ? BLOCK_SIZE : PART_SIZE
            @rightTree = HashTree.new(sizeRight, false, rightBaseSize)
            return @leftTree.loadLowestLevelHashes(hashList) && @rightTree.loadLowestLevelHashes(hashList)
          end
        end
        
        def reCalculateHash
          if @leftTree and @rightTree then
            return false if (not @leftTree.reCalculateHash) or (not @rightTree.reCalculateHash)
            h = OpenSSL::Digest::SHA1.new
            h.update(@leftTree.hash)
            h.update(@rightTree.hash)
            @hash = h.digest
            return true
          else
            return true
          end
        end
      end
      
      def initialize
        @hash_list = []
        @data_size = 0
      end
      
      def update(part)
        psize = part.size
        @data_size += psize
        pos = 0
        while pos < psize
          @hash_list.push OpenSSL::Digest::SHA1.new(part[pos, BLOCK_SIZE]).digest
          pos += BLOCK_SIZE
        end
      end
      
      def hexdigest
        blockSize = @data_size <= PART_SIZE ? BLOCK_SIZE : PART_SIZE
        tree = HashTree.new(@data_size, true, blockSize)
        tree.loadLowestLevelHashes(@hash_list)
        tree.reCalculateHash
        return Base32.encode32(tree.hash)
      end
      
      def name
        'ED2K'
      end
    end # class AICH
  end # module Digest
  
  module Base32
    B32TAB = 'abcdefghijklmnopqrstuvwxyz23456789'
    module_function
    
    def encode32(str)
      parts = []
      quanta, leftover = str.size.divmod(5)
      if leftover != 0 then
        str += ("\0" * (5 - leftover))
        quanta += 1
      end
      quanta.times do |i|
        c1, c2, c3 = str[i*5..(i+1)*5].unpack('nnC')
        c2 += (c1 & 1) << 16
        c3 += (c2 & 3) << 8
        parts.push(
          B32TAB[c1 >> 11],
          B32TAB[(c1 >> 6) & 0x1f],
          B32TAB[(c1 >> 1) & 0x1f],
          B32TAB[c2 >> 12],
          B32TAB[(c2 >> 7) & 0x1f],
          B32TAB[(c2 >> 2) & 0x1f],
          B32TAB[c3 >> 5],
          B32TAB[c3 & 0x1f]
        )
      end
      encoded = parts.join('')
      case leftover
      when 1
        encoded[0..-6] + '====='
      when 2
        encoded[0..-4] + '===='
      when 3
        encoded[0..-3] + '==='
      when 4
        encoded[0..-1] + '='
      else
        encoded
      end
    end
  end # module Base32
end # module OtNtH

if __FILE__ == $0 then
  exit if not File.exists?(ARGV[0])
  a = OtNtH::Digest::AICH.new
  e = OtNtH::Digest::ED2K.new
  File.open(ARGV[0], 'rb') do |f|
    buf = ''
    while f.read(OtNtH::Digest::PART_SIZE, buf)
      a.update(buf)
      e.update(buf)
    end
  end
  puts "ed2k://|file|#{File.basename(ARGV[0])}|#{File.size(ARGV[0])}|#{e.hexdigest}|h=#{a.hexdigest}|/"
end