Fix MOBI parser

Update Kindleunpack
Discover new and exciting bugs
This commit is contained in:
BasioMeusPuga
2019-02-10 17:58:35 +05:30
parent f6f9d01060
commit 3cd75807f9
9 changed files with 96 additions and 76 deletions

View File

@@ -6,7 +6,7 @@ from __future__ import unicode_literals, division, absolute_import, print_functi
import os
__path__ = ["lib", os.path.dirname(__file__), "kindleunpack"]
__path__ = ["lib", os.path.dirname(os.path.realpath(__file__)), "kindleunpack"]
import sys
import codecs
@@ -140,6 +140,8 @@ if PY2:
# 0.76 pre-release version only fix name related issues in opf by not using original file name in mobi7
# 0.77 bug fix for unpacking HDImages with included Fonts
# 0.80 converted to work with both python 2.7 and Python 3.3 and later
# 0.81 various fixes
# 0.82 Handle calibre-generated mobis that can have skeletons with no fragments
DUMP = False
""" Set to True to dump all possible information. """
@@ -847,7 +849,7 @@ def process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, k8only=Fa
return
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=True, dodump=False, dowriteraw=False, dosplitcombos=False):
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False):
global DUMP
global WRITE_RAW_DATA
global SPLIT_COMBO_MOBIS
@@ -949,7 +951,7 @@ def main(argv=unicode_argv()):
global WRITE_RAW_DATA
global SPLIT_COMBO_MOBIS
print("KindleUnpack v0.80")
print("KindleUnpack v0.82")
print(" Based on initial mobipocket version Copyright © 2009 Charles M. Hannum <root@ihack.net>")
print(" Extensive Extensions and Improvements Copyright © 2009-2014 ")
print(" by: P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding, tkeo.")

View File

@@ -180,9 +180,11 @@ class K8Processor:
fragptr = 0
baseptr = 0
cnt = 0
filename = 'part%04d.xhtml' % cnt
for [skelnum, skelname, fragcnt, skelpos, skellen] in self.skeltbl:
baseptr = skelpos + skellen
skeleton = text[skelpos: baseptr]
aidtext = "0"
for i in range(fragcnt):
[insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[fragptr]
aidtext = idtext[12:-2]

View File

@@ -246,13 +246,13 @@ class MobiMLConverter(object):
# handle case of end tag with no beginning by injecting empty begin tag
taginfo = ('begin', tname, None)
htmlstr += self.processtag(taginfo)
print(" - fixed by injecting empty start tag ", tname)
print " - fixed by injecting empty start tag ", tname
self.path.append(tname)
elif len(self.path) > 1 and tname == self.path[-2]:
# handle case of dangling missing end
taginfo = ('end', self.path[-1], None)
htmlstr += self.processtag(taginfo)
print(" - fixed by injecting end tag ", self.path[-1])
print " - fixed by injecting end tag ", self.path[-1]
self.path.pop()
self.path.pop()
@@ -504,18 +504,18 @@ def main(argv=sys.argv):
infile = argv[1]
try:
print('Converting Mobi Markup Language to XHTML')
print 'Converting Mobi Markup Language to XHTML'
mlc = MobiMLConverter(infile)
print('Processing ...')
print 'Processing ...'
htmlstr, css, cssname = mlc.processml()
outname = infile.rsplit('.',1)[0] + '_converted.html'
file(outname, 'wb').write(htmlstr)
file(cssname, 'wb').write(css)
print('Completed')
print('XHTML version of book can be found at: ', outname)
print 'Completed'
print 'XHTML version of book can be found at: ' + outname
except ValueError as e:
print("Error: %s" % e)
except ValueError, e:
print "Error: %s" % e
return 1
return 0