132 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			132 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| # -*- coding: utf-8 -*-
 | |
| # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 | |
| 
 | |
| from __future__ import unicode_literals, division, absolute_import, print_function
 | |
| 
 | |
| from .compatibility_utils import PY2, bchr, lmap, bstr
 | |
| 
 | |
| if PY2:
 | |
|     range = xrange
 | |
| 
 | |
| import struct
 | |
| # note:  struct pack, unpack, unpack_from all require bytestring format
 | |
| # data all the way up to at least python 2.7.5, python 3 okay with bytestring
 | |
| 
 | |
| 
 | |
| class unpackException(Exception):
 | |
|     pass
 | |
| 
 | |
| class UncompressedReader:
 | |
| 
 | |
|     def unpack(self, data):
 | |
|         return data
 | |
| 
 | |
| class PalmdocReader:
 | |
| 
 | |
|     def unpack(self, i):
 | |
|         o, p = b'', 0
 | |
|         while p < len(i):
 | |
|             # for python 3 must use slice since i[p] returns int while slice returns character
 | |
|             c = ord(i[p:p+1])
 | |
|             p += 1
 | |
|             if (c >= 1 and c <= 8):
 | |
|                 o += i[p:p+c]
 | |
|                 p += c
 | |
|             elif (c < 128):
 | |
|                 o += bchr(c)
 | |
|             elif (c >= 192):
 | |
|                 o += b' ' + bchr(c ^ 128)
 | |
|             else:
 | |
|                 if p < len(i):
 | |
|                     c = (c << 8) | ord(i[p:p+1])
 | |
|                     p += 1
 | |
|                     m = (c >> 3) & 0x07ff
 | |
|                     n = (c & 7) + 3
 | |
|                     if (m > n):
 | |
|                         o += o[-m:n-m]
 | |
|                     else:
 | |
|                         for _ in range(n):
 | |
|                             # because of completely ass-backwards decision by python mainters for python 3
 | |
|                             # we must use slice for bytes as i[p] returns int while slice returns character
 | |
|                             if m == 1:
 | |
|                                 o += o[-m:]
 | |
|                             else:
 | |
|                                 o += o[-m:-m+1]
 | |
|         return o
 | |
| 
 | |
| class HuffcdicReader:
 | |
|     q = struct.Struct(b'>Q').unpack_from
 | |
| 
 | |
|     def loadHuff(self, huff):
 | |
|         if huff[0:8] != b'HUFF\x00\x00\x00\x18':
 | |
|             raise unpackException('invalid huff header')
 | |
|         off1, off2 = struct.unpack_from(b'>LL', huff, 8)
 | |
| 
 | |
|         def dict1_unpack(v):
 | |
|             codelen, term, maxcode = v&0x1f, v&0x80, v>>8
 | |
|             assert codelen != 0
 | |
|             if codelen <= 8:
 | |
|                 assert term
 | |
|             maxcode = ((maxcode + 1) << (32 - codelen)) - 1
 | |
|             return (codelen, term, maxcode)
 | |
|         self.dict1 = lmap(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
 | |
| 
 | |
|         dict2 = struct.unpack_from(b'>64L', huff, off2)
 | |
|         self.mincode, self.maxcode = (), ()
 | |
|         for codelen, mincode in enumerate((0,) + dict2[0::2]):
 | |
|             self.mincode += (mincode << (32 - codelen), )
 | |
|         for codelen, maxcode in enumerate((0,) + dict2[1::2]):
 | |
|             self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
 | |
| 
 | |
|         self.dictionary = []
 | |
| 
 | |
|     def loadCdic(self, cdic):
 | |
|         if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
 | |
|             raise unpackException('invalid cdic header')
 | |
|         phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
 | |
|         n = min(1<<bits, phrases-len(self.dictionary))
 | |
|         h = struct.Struct(b'>H').unpack_from
 | |
|         def getslice(off):
 | |
|             blen, = h(cdic, 16+off)
 | |
|             slice = cdic[18+off:18+off+(blen&0x7fff)]
 | |
|             return (slice, blen&0x8000)
 | |
|         self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
 | |
| 
 | |
|     def unpack(self, data):
 | |
|         q = HuffcdicReader.q
 | |
| 
 | |
|         bitsleft = len(data) * 8
 | |
|         data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
 | |
|         pos = 0
 | |
|         x, = q(data, pos)
 | |
|         n = 32
 | |
| 
 | |
|         s = b''
 | |
|         while True:
 | |
|             if n <= 0:
 | |
|                 pos += 4
 | |
|                 x, = q(data, pos)
 | |
|                 n += 32
 | |
|             code = (x >> n) & ((1 << 32) - 1)
 | |
| 
 | |
|             codelen, term, maxcode = self.dict1[code >> 24]
 | |
|             if not term:
 | |
|                 while code < self.mincode[codelen]:
 | |
|                     codelen += 1
 | |
|                 maxcode = self.maxcode[codelen]
 | |
| 
 | |
|             n -= codelen
 | |
|             bitsleft -= codelen
 | |
|             if bitsleft < 0:
 | |
|                 break
 | |
| 
 | |
|             r = (maxcode - code) >> (32 - codelen)
 | |
|             slice, flag = self.dictionary[r]
 | |
|             if not flag:
 | |
|                 self.dictionary[r] = None
 | |
|                 slice = self.unpack(slice)
 | |
|                 self.dictionary[r] = (slice, 1)
 | |
|             s += slice
 | |
|         return s
 |