Whoops
This commit is contained in:
		| @@ -1,525 +0,0 @@ | |||||||
| #! /usr/bin/python |  | ||||||
| # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # this program works in concert with the output from KindleUnpack |  | ||||||
|  |  | ||||||
| ''' |  | ||||||
| Convert from Mobi ML to XHTML |  | ||||||
| ''' |  | ||||||
|  |  | ||||||
| import os |  | ||||||
| import sys |  | ||||||
| import re |  | ||||||
|  |  | ||||||
| SPECIAL_HANDLING_TAGS = { |  | ||||||
|     '?xml'     : ('xmlheader', -1), |  | ||||||
|     '!--'      : ('comment', -3), |  | ||||||
|     '!DOCTYPE' : ('doctype', -1), |  | ||||||
| } |  | ||||||
|  |  | ||||||
| SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment'] |  | ||||||
|  |  | ||||||
| SELF_CLOSING_TAGS = ['br' , 'hr', 'input', 'img', 'image', 'meta', 'spacer', 'link', 'frame', 'base', 'col', 'reference'] |  | ||||||
|  |  | ||||||
| class MobiMLConverter(object): |  | ||||||
|  |  | ||||||
|     PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE) |  | ||||||
|     IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex') |  | ||||||
|  |  | ||||||
|     def __init__(self, filename): |  | ||||||
|         self.base_css_rules =  'blockquote { margin: 0em 0em 0em 1.25em }\n' |  | ||||||
|         self.base_css_rules += 'p { margin: 0em }\n' |  | ||||||
|         self.base_css_rules += '.bold { font-weight: bold }\n' |  | ||||||
|         self.base_css_rules += '.italic { font-style: italic }\n' |  | ||||||
|         self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n' |  | ||||||
|         self.tag_css_rules = {} |  | ||||||
|         self.tag_css_rule_cnt = 0 |  | ||||||
|         self.path = [] |  | ||||||
|         self.filename = filename |  | ||||||
|         self.wipml = open(self.filename, 'rb').read() |  | ||||||
|         self.pos = 0 |  | ||||||
|         self.opfname = self.filename.rsplit('.',1)[0] + '.opf' |  | ||||||
|         self.opos = 0 |  | ||||||
|         self.meta = '' |  | ||||||
|         self.cssname = os.path.join(os.path.dirname(self.filename),'styles.css') |  | ||||||
|         self.current_font_size = 3 |  | ||||||
|         self.font_history = [] |  | ||||||
|  |  | ||||||
|     def cleanup_html(self): |  | ||||||
|         self.wipml = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.wipml) |  | ||||||
|         self.wipml = self.wipml.replace('\r\n', '\n') |  | ||||||
|         self.wipml = self.wipml.replace('> <', '>\n<') |  | ||||||
|         self.wipml = self.wipml.replace('<mbp: ', '<mbp:') |  | ||||||
|         # self.wipml = re.sub(r'<?xml[^>]*>', '', self.wipml) |  | ||||||
|         self.wipml = self.wipml.replace('<br></br>','<br/>') |  | ||||||
|  |  | ||||||
|     def replace_page_breaks(self): |  | ||||||
|         self.wipml = self.PAGE_BREAK_PAT.sub( |  | ||||||
|             '<div class="mbp_pagebreak" />', |  | ||||||
|             self.wipml) |  | ||||||
|  |  | ||||||
|     # parse leading text of ml and tag |  | ||||||
|     def parseml(self): |  | ||||||
|         p = self.pos |  | ||||||
|         if p >= len(self.wipml): |  | ||||||
|             return None |  | ||||||
|         if self.wipml[p] != '<': |  | ||||||
|             res = self.wipml.find('<',p) |  | ||||||
|             if res == -1 : |  | ||||||
|                 res = len(self.wipml) |  | ||||||
|             self.pos = res |  | ||||||
|             return self.wipml[p:res], None |  | ||||||
|         # handle comment as a special case to deal with multi-line comments |  | ||||||
|         if self.wipml[p:p+4] == '<!--': |  | ||||||
|             te = self.wipml.find('-->',p+1) |  | ||||||
|             if te != -1: |  | ||||||
|                 te = te+2 |  | ||||||
|         else : |  | ||||||
|             te = self.wipml.find('>',p+1) |  | ||||||
|             ntb = self.wipml.find('<',p+1) |  | ||||||
|             if ntb != -1 and ntb < te: |  | ||||||
|                 self.pos = ntb |  | ||||||
|                 return self.wipml[p:ntb], None |  | ||||||
|         self.pos = te + 1 |  | ||||||
|         return None, self.wipml[p:te+1] |  | ||||||
|  |  | ||||||
|     # parses string version of tag to identify its name, |  | ||||||
|     # its type 'begin', 'end' or 'single', |  | ||||||
|     # plus build a hashtable of its attributes |  | ||||||
|     # code is written to handle the possiblity of very poor formating |  | ||||||
|     def parsetag(self, s): |  | ||||||
|         p = 1 |  | ||||||
|         # get the tag name |  | ||||||
|         tname = None |  | ||||||
|         ttype = None |  | ||||||
|         tattr = {} |  | ||||||
|         while s[p:p+1] == ' ' : |  | ||||||
|             p += 1 |  | ||||||
|         if s[p:p+1] == '/': |  | ||||||
|             ttype = 'end' |  | ||||||
|             p += 1 |  | ||||||
|             while s[p:p+1] == ' ' : |  | ||||||
|                 p += 1 |  | ||||||
|         b = p |  | ||||||
|         while s[p:p+1] not in ('>', '/', ' ', '"', "'", "\r", "\n") : |  | ||||||
|             p += 1 |  | ||||||
|         tname=s[b:p].lower() |  | ||||||
|         if tname == '!doctype': |  | ||||||
|             tname = '!DOCTYPE' |  | ||||||
|         # special cases |  | ||||||
|         if tname in SPECIAL_HANDLING_TAGS.keys(): |  | ||||||
|             ttype, backstep = SPECIAL_HANDLING_TAGS[tname] |  | ||||||
|             tattr['special'] = s[p:backstep] |  | ||||||
|         if ttype is None: |  | ||||||
|             # parse any attributes |  | ||||||
|             while s.find('=',p) != -1 : |  | ||||||
|                 while s[p:p+1] == ' ' : |  | ||||||
|                     p += 1 |  | ||||||
|                 b = p |  | ||||||
|                 while s[p:p+1] != '=' : |  | ||||||
|                     p += 1 |  | ||||||
|                 aname = s[b:p].lower() |  | ||||||
|                 aname = aname.rstrip(' ') |  | ||||||
|                 p += 1 |  | ||||||
|                 while s[p:p+1] == ' ' : |  | ||||||
|                     p += 1 |  | ||||||
|                 if s[p:p+1] in ('"', "'") : |  | ||||||
|                     p = p + 1 |  | ||||||
|                     b = p |  | ||||||
|                     while s[p:p+1] not in ('"', "'") : |  | ||||||
|                         p += 1 |  | ||||||
|                     val = s[b:p] |  | ||||||
|                     p += 1 |  | ||||||
|                 else : |  | ||||||
|                     b = p |  | ||||||
|                     while s[p:p+1] not in ('>', '/', ' ') : |  | ||||||
|                         p += 1 |  | ||||||
|                     val = s[b:p] |  | ||||||
|                 tattr[aname] = val |  | ||||||
|         # label beginning and single tags |  | ||||||
|         if ttype is None: |  | ||||||
|             ttype = 'begin' |  | ||||||
|             if s.find(' /',p) >= 0: |  | ||||||
|                 ttype = 'single_ext' |  | ||||||
|             elif s.find('/',p) >= 0: |  | ||||||
|                 ttype = 'single' |  | ||||||
|         return ttype, tname, tattr |  | ||||||
|  |  | ||||||
|     # main routine to convert from mobi markup language to html |  | ||||||
|     def processml(self): |  | ||||||
|  |  | ||||||
|         # are these really needed |  | ||||||
|         html_done = False |  | ||||||
|         head_done = False |  | ||||||
|         body_done = False |  | ||||||
|  |  | ||||||
|         skip = False |  | ||||||
|  |  | ||||||
|         htmlstr = '' |  | ||||||
|         self.replace_page_breaks() |  | ||||||
|         self.cleanup_html() |  | ||||||
|  |  | ||||||
|         # now parse the cleaned up ml into standard xhtml |  | ||||||
|         while True: |  | ||||||
|  |  | ||||||
|             r = self.parseml() |  | ||||||
|             if not r: |  | ||||||
|                 break |  | ||||||
|  |  | ||||||
|             text, tag = r |  | ||||||
|  |  | ||||||
|             if text: |  | ||||||
|                 if not skip: |  | ||||||
|                     htmlstr += text |  | ||||||
|  |  | ||||||
|             if tag: |  | ||||||
|                 ttype, tname, tattr = self.parsetag(tag) |  | ||||||
|  |  | ||||||
|                 # If we run into a DTD or xml declarations inside the body ... bail. |  | ||||||
|                 if tname in SPECIAL_HANDLING_TAGS.keys() and tname != 'comment' and body_done: |  | ||||||
|                     htmlstr += '\n</body></html>' |  | ||||||
|                     break |  | ||||||
|  |  | ||||||
|                 # make sure self-closing tags actually self-close |  | ||||||
|                 if ttype == 'begin' and tname in SELF_CLOSING_TAGS: |  | ||||||
|                     ttype = 'single' |  | ||||||
|  |  | ||||||
|                 # make sure any end tags of self-closing tags are discarded |  | ||||||
|                 if ttype == 'end' and tname in SELF_CLOSING_TAGS: |  | ||||||
|                     continue |  | ||||||
|  |  | ||||||
|                 # remove embedded guide and refernces from old mobis |  | ||||||
|                 if tname in ('guide', 'ncx', 'reference') and ttype in ('begin', 'single', 'single_ext'): |  | ||||||
|                     tname = 'removeme:{0}'.format(tname) |  | ||||||
|                     tattr = None |  | ||||||
|                 if tname in ('guide', 'ncx', 'reference', 'font', 'span') and ttype == 'end': |  | ||||||
|                     if self.path[-1] == 'removeme:{0}'.format(tname): |  | ||||||
|                         tname = 'removeme:{0}'.format(tname) |  | ||||||
|                         tattr = None |  | ||||||
|  |  | ||||||
|                 # Get rid of font tags that only have a color attribute. |  | ||||||
|                 if tname == 'font' and ttype in ('begin', 'single', 'single_ext'): |  | ||||||
|                     if 'color' in tattr.keys() and len(tattr.keys()) == 1: |  | ||||||
|                         tname = 'removeme:{0}'.format(tname) |  | ||||||
|                         tattr = None |  | ||||||
|  |  | ||||||
|                 # Get rid of empty spans in the markup. |  | ||||||
|                 if tname == 'span' and ttype in ('begin', 'single', 'single_ext') and not len(tattr): |  | ||||||
|                     tname = 'removeme:{0}'.format(tname) |  | ||||||
|  |  | ||||||
|                 # need to handle fonts outside of the normal methods |  | ||||||
|                 # so fonts tags won't be added to the self.path since we keep track |  | ||||||
|                 # of font tags separately with self.font_history |  | ||||||
|                 if tname == 'font' and ttype == 'begin': |  | ||||||
|                     # check for nested font start tags |  | ||||||
|                     if len(self.font_history) > 0 : |  | ||||||
|                         # inject a font end tag |  | ||||||
|                         taginfo = ('end', 'font', None) |  | ||||||
|                         htmlstr += self.processtag(taginfo) |  | ||||||
|                     self.font_history.append((ttype, tname, tattr)) |  | ||||||
|                     # handle the current font start tag |  | ||||||
|                     taginfo = (ttype, tname, tattr) |  | ||||||
|                     htmlstr += self.processtag(taginfo) |  | ||||||
|                     continue |  | ||||||
|  |  | ||||||
|                 # check for nested font tags and unnest them |  | ||||||
|                 if tname == 'font' and ttype == 'end': |  | ||||||
|                     self.font_history.pop() |  | ||||||
|                     # handle this font end tag |  | ||||||
|                     taginfo = ('end', 'font', None) |  | ||||||
|                     htmlstr += self.processtag(taginfo) |  | ||||||
|                     # check if we were nested |  | ||||||
|                     if len(self.font_history) > 0: |  | ||||||
|                         # inject a copy of the most recent font start tag from history |  | ||||||
|                         taginfo = self.font_history[-1] |  | ||||||
|                         htmlstr += self.processtag(taginfo) |  | ||||||
|                     continue |  | ||||||
|  |  | ||||||
|                 # keep track of nesting path |  | ||||||
|                 if ttype == 'begin': |  | ||||||
|                     self.path.append(tname) |  | ||||||
|                 elif ttype == 'end': |  | ||||||
|                     if tname != self.path[-1]: |  | ||||||
|                         print ('improper nesting: ', self.path, tname, ttype) |  | ||||||
|                         if tname not in self.path: |  | ||||||
|                             # handle case of end tag with no beginning by injecting empty begin tag |  | ||||||
|                             taginfo = ('begin', tname, None) |  | ||||||
|                             htmlstr += self.processtag(taginfo) |  | ||||||
|                             print "     - fixed by injecting empty start tag ", tname |  | ||||||
|                             self.path.append(tname) |  | ||||||
|                         elif len(self.path) >  1 and tname == self.path[-2]: |  | ||||||
|                             # handle case of dangling missing end |  | ||||||
|                             taginfo = ('end', self.path[-1], None) |  | ||||||
|                             htmlstr += self.processtag(taginfo) |  | ||||||
|                             print "     - fixed by injecting end tag ", self.path[-1] |  | ||||||
|                             self.path.pop() |  | ||||||
|                     self.path.pop() |  | ||||||
|  |  | ||||||
|                 if tname == 'removeme:{0}'.format(tname): |  | ||||||
|                     if ttype in ('begin', 'single', 'single_ext'): |  | ||||||
|                         skip = True |  | ||||||
|                     else: |  | ||||||
|                         skip = False |  | ||||||
|                 else: |  | ||||||
|                     taginfo = (ttype, tname, tattr) |  | ||||||
|                     htmlstr += self.processtag(taginfo) |  | ||||||
|  |  | ||||||
|                 # handle potential issue of multiple html, head, and body sections |  | ||||||
|                 if tname == 'html' and ttype == 'begin' and not html_done: |  | ||||||
|                     htmlstr += '\n' |  | ||||||
|                     html_done = True |  | ||||||
|  |  | ||||||
|                 if tname == 'head' and ttype == 'begin' and not head_done: |  | ||||||
|                     htmlstr += '\n' |  | ||||||
|                     # also add in metadata and style link tags |  | ||||||
|                     htmlstr += self.meta |  | ||||||
|                     htmlstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n' |  | ||||||
|                     head_done = True |  | ||||||
|  |  | ||||||
|                 if tname == 'body' and ttype == 'begin' and not body_done: |  | ||||||
|                     htmlstr += '\n' |  | ||||||
|                     body_done = True |  | ||||||
|  |  | ||||||
|         # handle issue of possibly missing html, head, and body tags |  | ||||||
|         # I have not seen this but the original did something like this so ... |  | ||||||
|         if not body_done: |  | ||||||
|             htmlstr = '<body>\n' + htmlstr + '</body>\n' |  | ||||||
|         if not head_done: |  | ||||||
|             headstr = '<head>\n' |  | ||||||
|             headstr += self.meta |  | ||||||
|             headstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n' |  | ||||||
|             headstr += '</head>\n' |  | ||||||
|             htmlstr = headstr + htmlstr |  | ||||||
|         if not html_done: |  | ||||||
|             htmlstr = '<html>\n' + htmlstr + '</html>\n' |  | ||||||
|  |  | ||||||
|         # finally add DOCTYPE info |  | ||||||
|         htmlstr = '<?xml version="1.0"?>\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' + htmlstr |  | ||||||
|  |  | ||||||
|         css = self.base_css_rules |  | ||||||
|         for cls, rule in self.tag_css_rules.items(): |  | ||||||
|             css += '.%s { %s }\n' % (cls, rule) |  | ||||||
|  |  | ||||||
|         return (htmlstr, css, self.cssname) |  | ||||||
|  |  | ||||||
|     def ensure_unit(self, raw, unit='px'): |  | ||||||
|         if re.search(r'\d+$', raw) is not None: |  | ||||||
|             raw += unit |  | ||||||
|         return raw |  | ||||||
|  |  | ||||||
|     # flatten possibly modified tag back to string |  | ||||||
|     def taginfo_tostring(self, taginfo): |  | ||||||
|         (ttype, tname, tattr) = taginfo |  | ||||||
|         if ttype is None or tname is None: |  | ||||||
|             return '' |  | ||||||
|         if ttype == 'end': |  | ||||||
|             return '</%s>' % tname |  | ||||||
|         if ttype in SPECIAL_HANDLING_TYPES and tattr is not None and 'special' in tattr.keys(): |  | ||||||
|             info = tattr['special'] |  | ||||||
|             if ttype == 'comment': |  | ||||||
|                 return '<%s %s-->' % tname, info |  | ||||||
|             else: |  | ||||||
|                 return '<%s %s>' % tname, info |  | ||||||
|         res = [] |  | ||||||
|         res.append('<%s' % tname) |  | ||||||
|         if tattr is not None: |  | ||||||
|             for key in tattr.keys(): |  | ||||||
|                 res.append(' %s="%s"' % (key, tattr[key])) |  | ||||||
|         if ttype == 'single': |  | ||||||
|             res.append('/>') |  | ||||||
|         elif ttype == 'single_ext': |  | ||||||
|             res.append(' />') |  | ||||||
|         else : |  | ||||||
|             res.append('>') |  | ||||||
|         return "".join(res) |  | ||||||
|  |  | ||||||
|     # routines to convert from mobi ml tags atributes to xhtml attributes and styles |  | ||||||
|     def processtag(self, taginfo): |  | ||||||
|         # Converting mobi font sizes to numerics |  | ||||||
|         size_map = { |  | ||||||
|             'xx-small': '1', |  | ||||||
|             'x-small': '2', |  | ||||||
|             'small': '3', |  | ||||||
|             'medium': '4', |  | ||||||
|             'large': '5', |  | ||||||
|             'x-large': '6', |  | ||||||
|             'xx-large': '7', |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         size_to_em_map = { |  | ||||||
|             '1': '.65em', |  | ||||||
|             '2': '.75em', |  | ||||||
|             '3': '1em', |  | ||||||
|             '4': '1.125em', |  | ||||||
|             '5': '1.25em', |  | ||||||
|             '6': '1.5em', |  | ||||||
|             '7': '2em', |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         # current tag to work on |  | ||||||
|         (ttype, tname, tattr) = taginfo |  | ||||||
|         if not tattr: |  | ||||||
|             tattr = {} |  | ||||||
|  |  | ||||||
|         styles = [] |  | ||||||
|  |  | ||||||
|         if tname is None or tname.startswith('removeme'): |  | ||||||
|             return '' |  | ||||||
|  |  | ||||||
|         # have not seen an example of this yet so keep it here to be safe |  | ||||||
|         # until this is better understood |  | ||||||
|         if tname in ('country-region', 'place', 'placetype', 'placename', |  | ||||||
|                 'state', 'city', 'street', 'address', 'content'): |  | ||||||
|             tname = 'div' if tname == 'content' else 'span' |  | ||||||
|             for key in tattr.keys(): |  | ||||||
|                 tattr.pop(key) |  | ||||||
|  |  | ||||||
|         # handle general case of style, height, width, bgcolor in any tag |  | ||||||
|         if 'style' in tattr.keys(): |  | ||||||
|             style = tattr.pop('style').strip() |  | ||||||
|             if style: |  | ||||||
|                 styles.append(style) |  | ||||||
|  |  | ||||||
|         if 'align' in tattr.keys(): |  | ||||||
|             align = tattr.pop('align').strip() |  | ||||||
|             if align: |  | ||||||
|                 if tname in ('table', 'td', 'tr'): |  | ||||||
|                     pass |  | ||||||
|                 else: |  | ||||||
|                     styles.append('text-align: %s' % align) |  | ||||||
|  |  | ||||||
|         if 'height' in tattr.keys(): |  | ||||||
|             height = tattr.pop('height').strip() |  | ||||||
|             if height and '<' not in height and '>' not in height and re.search(r'\d+', height): |  | ||||||
|                 if tname in ('table', 'td', 'tr'): |  | ||||||
|                     pass |  | ||||||
|                 elif tname == 'img': |  | ||||||
|                     tattr['height'] = height |  | ||||||
|                 else: |  | ||||||
|                     styles.append('margin-top: %s' % self.ensure_unit(height)) |  | ||||||
|  |  | ||||||
|         if 'width' in tattr.keys(): |  | ||||||
|             width = tattr.pop('width').strip() |  | ||||||
|             if width and re.search(r'\d+', width): |  | ||||||
|                 if tname in ('table', 'td', 'tr'): |  | ||||||
|                     pass |  | ||||||
|                 elif tname == 'img': |  | ||||||
|                     tattr['width'] =  width |  | ||||||
|                 else: |  | ||||||
|                     styles.append('text-indent: %s' % self.ensure_unit(width)) |  | ||||||
|                     if width.startswith('-'): |  | ||||||
|                         styles.append('margin-left: %s' % self.ensure_unit(width[1:])) |  | ||||||
|  |  | ||||||
|         if 'bgcolor' in tattr.keys(): |  | ||||||
|             # no proprietary html allowed |  | ||||||
|             if tname == 'div': |  | ||||||
|                 del tattr['bgcolor'] |  | ||||||
|  |  | ||||||
|         elif tname == 'font': |  | ||||||
|             # Change font tags to span tags |  | ||||||
|             tname = 'span' |  | ||||||
|             if ttype in ('begin', 'single', 'single_ext'): |  | ||||||
|                 # move the face attribute to css font-family |  | ||||||
|                 if 'face' in tattr.keys(): |  | ||||||
|                     face = tattr.pop('face').strip() |  | ||||||
|                     styles.append('font-family: "%s"' % face) |  | ||||||
|  |  | ||||||
|                     # Monitor the constantly changing font sizes, change them to ems and move |  | ||||||
|                     # them to css. The following will work for 'flat' font tags, but nested font tags |  | ||||||
|                     # will cause things to go wonky. Need to revert to the parent font tag's size |  | ||||||
|                     # when a closing tag is encountered. |  | ||||||
|                 if 'size' in tattr.keys(): |  | ||||||
|                     sz = tattr.pop('size').strip().lower() |  | ||||||
|                     try: |  | ||||||
|                         float(sz) |  | ||||||
|                     except ValueError: |  | ||||||
|                         if sz in size_map.keys(): |  | ||||||
|                             sz = size_map[sz] |  | ||||||
|                     else: |  | ||||||
|                         if sz.startswith('-') or sz.startswith('+'): |  | ||||||
|                             sz = self.current_font_size + float(sz) |  | ||||||
|                             if sz > 7: |  | ||||||
|                                 sz = 7 |  | ||||||
|                             elif sz < 1: |  | ||||||
|                                 sz = 1 |  | ||||||
|                             sz = str(int(sz)) |  | ||||||
|                     styles.append('font-size: %s' % size_to_em_map[sz]) |  | ||||||
|                     self.current_font_size = int(sz) |  | ||||||
|  |  | ||||||
|         elif tname == 'img': |  | ||||||
|             for attr in ('width', 'height'): |  | ||||||
|                 if attr in tattr: |  | ||||||
|                     val = tattr[attr] |  | ||||||
|                     if val.lower().endswith('em'): |  | ||||||
|                         try: |  | ||||||
|                             nval = float(val[:-2]) |  | ||||||
|                             nval *= 16 * (168.451/72)  # Assume this was set using the Kindle profile |  | ||||||
|                             tattr[attr] = "%dpx"%int(nval) |  | ||||||
|                         except: |  | ||||||
|                             del tattr[attr] |  | ||||||
|                     elif val.lower().endswith('%'): |  | ||||||
|                         del tattr[attr] |  | ||||||
|  |  | ||||||
|         # convert the anchor tags |  | ||||||
|         if 'filepos-id' in tattr: |  | ||||||
|             tattr['id'] = tattr.pop('filepos-id') |  | ||||||
|             if 'name' in tattr and tattr['name'] != tattr['id']: |  | ||||||
|                 tattr['name'] = tattr['id'] |  | ||||||
|  |  | ||||||
|         if 'filepos' in tattr: |  | ||||||
|             filepos = tattr.pop('filepos') |  | ||||||
|             try: |  | ||||||
|                 tattr['href'] = "#filepos%d" % int(filepos) |  | ||||||
|             except ValueError: |  | ||||||
|                 pass |  | ||||||
|  |  | ||||||
|         if styles: |  | ||||||
|             ncls = None |  | ||||||
|             rule = '; '.join(styles) |  | ||||||
|             for sel, srule in self.tag_css_rules.items(): |  | ||||||
|                 if srule == rule: |  | ||||||
|                     ncls = sel |  | ||||||
|                     break |  | ||||||
|             if ncls is None: |  | ||||||
|                 self.tag_css_rule_cnt += 1 |  | ||||||
|                 ncls = 'rule_%d' % self.tag_css_rule_cnt |  | ||||||
|                 self.tag_css_rules[ncls] = rule |  | ||||||
|             cls = tattr.get('class', '') |  | ||||||
|             cls = cls + (' ' if cls else '') + ncls |  | ||||||
|             tattr['class'] = cls |  | ||||||
|  |  | ||||||
|         # convert updated tag back to string representation |  | ||||||
|         if len(tattr) == 0: |  | ||||||
|             tattr = None |  | ||||||
|         taginfo = (ttype, tname, tattr) |  | ||||||
|         return self.taginfo_tostring(taginfo) |  | ||||||
|  |  | ||||||
| ''' main only left in for testing outside of plugin ''' |  | ||||||
|  |  | ||||||
| def main(argv=sys.argv): |  | ||||||
|     if len(argv) != 2: |  | ||||||
|         return 1 |  | ||||||
|     else: |  | ||||||
|         infile = argv[1] |  | ||||||
|  |  | ||||||
|     try: |  | ||||||
|         print 'Converting Mobi Markup Language to XHTML' |  | ||||||
|         mlc = MobiMLConverter(infile) |  | ||||||
|         print 'Processing ...' |  | ||||||
|         htmlstr, css, cssname = mlc.processml() |  | ||||||
|         outname = infile.rsplit('.',1)[0] + '_converted.html' |  | ||||||
|         file(outname, 'wb').write(htmlstr) |  | ||||||
|         file(cssname, 'wb').write(css) |  | ||||||
|         print 'Completed' |  | ||||||
|         print 'XHTML version of book can be found at: ' + outname |  | ||||||
|  |  | ||||||
|     except ValueError, e: |  | ||||||
|         print "Error: %s" % e |  | ||||||
|         return 1 |  | ||||||
|  |  | ||||||
|     return 0 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": |  | ||||||
|     sys.exit(main()) |  | ||||||
| @@ -429,15 +429,15 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow): | |||||||
|         logger.info( |         logger.info( | ||||||
|             'Attempting to open: ' + ', '.join(file_paths)) |             'Attempting to open: ' + ', '.join(file_paths)) | ||||||
|  |  | ||||||
|         contents = sorter.BookSorter( |         contents, errors = sorter.BookSorter( | ||||||
|             file_paths, |             file_paths, | ||||||
|             ('reading', None), |             ('reading', None), | ||||||
|             self.database_path, |             self.database_path, | ||||||
|             self.settings, |             self.settings, | ||||||
|             self.temp_dir.path()).initiate_threads() |             self.temp_dir.path()).initiate_threads() | ||||||
|  |  | ||||||
|         # TODO |         if errors: | ||||||
|         # Notification feedback in case all books return nothing |             self.display_error_notification(errors) | ||||||
|  |  | ||||||
|         if not contents: |         if not contents: | ||||||
|             logger.error('No parseable files found') |             logger.error('No parseable files found') | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user