Fix MOBI parser

Update Kindleunpack Discover new and exciting bugs
2019-02-10 17:58:35 +05:30
parent f6f9d01060
commit 3cd75807f9
9 changed files with 96 additions and 76 deletions
--- a/lector/KindleUnpack/kindleunpack.py
+++ b/lector/KindleUnpack/kindleunpack.py
@@ -6,7 +6,7 @@ from __future__ import unicode_literals, division, absolute_import, print_functi

 import os

-__path__ = ["lib", os.path.dirname(__file__), "kindleunpack"]
+__path__ = ["lib", os.path.dirname(os.path.realpath(__file__)), "kindleunpack"]

 import sys
 import codecs
@@ -140,6 +140,8 @@ if PY2:
 #  0.76   pre-release version only fix name related issues in opf by not using original file name in mobi7
 #  0.77   bug fix for unpacking HDImages with included Fonts
 #  0.80   converted to work with both python 2.7 and Python 3.3 and later
+#  0.81   various fixes
+#  0.82   Handle calibre-generated mobis that can have skeletons with no fragments

 DUMP = False
 """ Set to True to dump all possible information. """
@@ -847,7 +849,7 @@ def process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, k8only=Fa
    return


-def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=True, dodump=False, dowriteraw=False, dosplitcombos=False):
+def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False):
    global DUMP
    global WRITE_RAW_DATA
    global SPLIT_COMBO_MOBIS
@@ -949,7 +951,7 @@ def main(argv=unicode_argv()):
    global WRITE_RAW_DATA
    global SPLIT_COMBO_MOBIS

-    print("KindleUnpack v0.80")
+    print("KindleUnpack v0.82")
    print("   Based on initial mobipocket version Copyright © 2009 Charles M. Hannum <root@ihack.net>")
    print("   Extensive Extensions and Improvements Copyright © 2009-2014 ")
    print("       by:  P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding, tkeo.")
--- a/lector/KindleUnpack/mobi_k8proc.py
+++ b/lector/KindleUnpack/mobi_k8proc.py
@@ -180,9 +180,11 @@ class K8Processor:
        fragptr = 0
        baseptr = 0
        cnt = 0
+        filename = 'part%04d.xhtml' % cnt
        for [skelnum, skelname, fragcnt, skelpos, skellen] in self.skeltbl:
            baseptr = skelpos + skellen
            skeleton = text[skelpos: baseptr]
+            aidtext = "0"
            for i in range(fragcnt):
                [insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[fragptr]
                aidtext = idtext[12:-2]
--- a/lector/KindleUnpack/mobiml2xhtml.py
+++ b/lector/KindleUnpack/mobiml2xhtml.py
@@ -246,13 +246,13 @@ class MobiMLConverter(object):
                            # handle case of end tag with no beginning by injecting empty begin tag
                            taginfo = ('begin', tname, None)
                            htmlstr += self.processtag(taginfo)
-                            print("     - fixed by injecting empty start tag ", tname)
+                            print "     - fixed by injecting empty start tag ", tname
                            self.path.append(tname)
                        elif len(self.path) >  1 and tname == self.path[-2]:
                            # handle case of dangling missing end
                            taginfo = ('end', self.path[-1], None)
                            htmlstr += self.processtag(taginfo)
-                            print("     - fixed by injecting end tag ", self.path[-1])
+                            print "     - fixed by injecting end tag ", self.path[-1]
                            self.path.pop()
                    self.path.pop()

@@ -504,18 +504,18 @@ def main(argv=sys.argv):
        infile = argv[1]

    try:
-        print('Converting Mobi Markup Language to XHTML')
+        print 'Converting Mobi Markup Language to XHTML'
        mlc = MobiMLConverter(infile)
-        print('Processing ...')
+        print 'Processing ...'
        htmlstr, css, cssname = mlc.processml()
        outname = infile.rsplit('.',1)[0] + '_converted.html'
        file(outname, 'wb').write(htmlstr)
        file(cssname, 'wb').write(css)
-        print('Completed')
-        print('XHTML version of book can be found at: ', outname)
+        print 'Completed'
+        print 'XHTML version of book can be found at: ' + outname

-    except ValueError as e:
-        print("Error: %s" % e)
+    except ValueError, e:
+        print "Error: %s" % e
        return 1

    return 0