440 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			440 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| # -*- coding: utf-8 -*-
 | |
| # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 | |
| 
 | |
| from __future__ import unicode_literals, division, absolute_import, print_function
 | |
| 
 | |
| from .compatibility_utils import PY2, utf8_str
 | |
| 
 | |
| if PY2:
 | |
|     range = xrange
 | |
| 
 | |
| import re
 | |
| # note: re requites the pattern to be the exact same type as the data to be searched in python3
 | |
| # but u"" is not allowed for the pattern itself only b""
 | |
| 
 | |
| from .mobi_utils import fromBase32
 | |
| 
 | |
| class HTMLProcessor:
 | |
| 
 | |
|     def __init__(self, files, metadata, rscnames):
 | |
|         self.files = files
 | |
|         self.metadata = metadata
 | |
|         self.rscnames = rscnames
 | |
|         # for original style mobis, default to including all image files in the opf manifest
 | |
|         self.used = {}
 | |
|         for name in rscnames:
 | |
|             self.used[name] = 'used'
 | |
| 
 | |
|     def findAnchors(self, rawtext, indx_data, positionMap):
 | |
|         # process the raw text
 | |
|         # find anchors...
 | |
|         print("Find link anchors")
 | |
|         link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE)
 | |
|         # TEST NCX: merge in filepos from indx
 | |
|         pos_links = [int(m.group(1)) for m in link_pattern.finditer(rawtext)]
 | |
|         if indx_data:
 | |
|             pos_indx = [e['pos'] for e in indx_data if e['pos']>0]
 | |
|             pos_links = list(set(pos_links + pos_indx))
 | |
| 
 | |
|         for position in pos_links:
 | |
|             if position in positionMap:
 | |
|                 positionMap[position] = positionMap[position] + utf8_str('<a id="filepos%d" />' % position)
 | |
|             else:
 | |
|                 positionMap[position] = utf8_str('<a id="filepos%d" />' % position)
 | |
| 
 | |
|         # apply dictionary metadata and anchors
 | |
|         print("Insert data into html")
 | |
|         pos = 0
 | |
|         lastPos = len(rawtext)
 | |
|         dataList = []
 | |
|         for end in sorted(positionMap.keys()):
 | |
|             if end == 0 or end > lastPos:
 | |
|                 continue  # something's up - can't put a tag in outside <html>...</html>
 | |
|             dataList.append(rawtext[pos:end])
 | |
|             dataList.append(positionMap[end])
 | |
|             pos = end
 | |
|         dataList.append(rawtext[pos:])
 | |
|         srctext = b"".join(dataList)
 | |
|         rawtext = None
 | |
|         dataList = None
 | |
|         self.srctext = srctext
 | |
|         self.indx_data = indx_data
 | |
|         return srctext
 | |
| 
 | |
|     def insertHREFS(self):
 | |
|         srctext = self.srctext
 | |
|         rscnames = self.rscnames
 | |
|         metadata = self.metadata
 | |
| 
 | |
|         # put in the hrefs
 | |
|         print("Insert hrefs into html")
 | |
|         # There doesn't seem to be a standard, so search as best as we can
 | |
| 
 | |
|         link_pattern = re.compile(br'''<a([^>]*?)filepos=['"]{0,1}0*(\d+)['"]{0,1}([^>]*?)>''', re.IGNORECASE)
 | |
|         srctext = link_pattern.sub(br'''<a\1href="#filepos\2"\3>''', srctext)
 | |
| 
 | |
|         # remove empty anchors
 | |
|         print("Remove empty anchors from html")
 | |
|         srctext = re.sub(br"<a\s*/>",br"", srctext)
 | |
|         srctext = re.sub(br"<a\s*>\s*</a>",br"", srctext)
 | |
| 
 | |
|         # convert image references
 | |
|         print("Insert image references into html")
 | |
|         # split string into image tag pieces and other pieces
 | |
|         image_pattern = re.compile(br'''(<img.*?>)''', re.IGNORECASE)
 | |
|         image_index_pattern = re.compile(br'''recindex=['"]{0,1}([0-9]+)['"]{0,1}''', re.IGNORECASE)
 | |
|         srcpieces = image_pattern.split(srctext)
 | |
|         srctext = self.srctext = None
 | |
| 
 | |
|         # all odd pieces are image tags (nulls string on even pieces if no space between them in srctext)
 | |
|         for i in range(1, len(srcpieces), 2):
 | |
|             tag = srcpieces[i]
 | |
|             for m in image_index_pattern.finditer(tag):
 | |
|                 imageNumber = int(m.group(1))
 | |
|                 imageName = rscnames[imageNumber-1]
 | |
|                 if imageName is None:
 | |
|                     print("Error: Referenced image %s was not recognized as a valid image" % imageNumber)
 | |
|                 else:
 | |
|                     replacement = b'src="Images/' + utf8_str(imageName) + b'"'
 | |
|                     tag = image_index_pattern.sub(replacement, tag, 1)
 | |
|             srcpieces[i] = tag
 | |
|         srctext = b"".join(srcpieces)
 | |
| 
 | |
|         # add in character set meta into the html header if needed
 | |
|         if 'Codec' in metadata:
 | |
|             srctext = srctext[0:12]+b'<meta http-equiv="content-type" content="text/html; charset='+utf8_str(metadata.get('Codec')[0])+b'" />'+srctext[12:]
 | |
|         return srctext, self.used
 | |
| 
 | |
| 
 | |
| class XHTMLK8Processor:
 | |
| 
 | |
|     def __init__(self, rscnames, k8proc):
 | |
|         self.rscnames = rscnames
 | |
|         self.k8proc = k8proc
 | |
|         self.used = {}
 | |
| 
 | |
|     def buildXHTML(self):
 | |
| 
 | |
|         # first need to update all links that are internal which
 | |
|         # are based on positions within the xhtml files **BEFORE**
 | |
|         # cutting and pasting any pieces into the xhtml text files
 | |
| 
 | |
|         #   kindle:pos:fid:XXXX:off:YYYYYYYYYY  (used for internal link within xhtml)
 | |
|         #       XXXX is the offset in records into divtbl
 | |
|         #       YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position
 | |
| 
 | |
|         # pos:fid pattern
 | |
|         posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
 | |
|         posfid_index_pattern = re.compile(br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')
 | |
| 
 | |
|         parts = []
 | |
|         print("Building proper xhtml for each file")
 | |
|         for i in range(self.k8proc.getNumberOfParts()):
 | |
|             part = self.k8proc.getPart(i)
 | |
|             [partnum, dir, filename, beg, end, aidtext] = self.k8proc.getPartInfo(i)
 | |
| 
 | |
|             # internal links
 | |
|             srcpieces = posfid_pattern.split(part)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<'):
 | |
|                     for m in posfid_index_pattern.finditer(tag):
 | |
|                         posfid = m.group(1)
 | |
|                         offset = m.group(2)
 | |
|                         filename, idtag = self.k8proc.getIDTagByPosFid(posfid, offset)
 | |
|                         if idtag == b'':
 | |
|                             replacement= b'"' + utf8_str(filename) + b'"'
 | |
|                         else:
 | |
|                             replacement = b'"' + utf8_str(filename) + b'#' + idtag + b'"'
 | |
|                         tag = posfid_index_pattern.sub(replacement, tag, 1)
 | |
|                     srcpieces[j] = tag
 | |
|             part = b"".join(srcpieces)
 | |
|             parts.append(part)
 | |
| 
 | |
|         # we are free to cut and paste as we see fit
 | |
|         # we can safely remove all of the Kindlegen generated aid tags
 | |
|         # change aid ids that are in k8proc.linked_aids to xhtml ids
 | |
|         find_tag_with_aid_pattern = re.compile(br'''(<[^>]*\said\s*=[^>]*>)''', re.IGNORECASE)
 | |
|         within_tag_aid_position_pattern = re.compile(br'''\said\s*=['"]([^'"]*)['"]''')
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             srcpieces = find_tag_with_aid_pattern.split(part)
 | |
|             for j in range(len(srcpieces)):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<'):
 | |
|                     for m in within_tag_aid_position_pattern.finditer(tag):
 | |
|                         try:
 | |
|                             aid = m.group(1)
 | |
|                         except IndexError:
 | |
|                             aid = None
 | |
|                         replacement = b''
 | |
|                         if aid in self.k8proc.linked_aids:
 | |
|                             replacement = b' id="aid-' + aid + b'"'
 | |
|                         tag = within_tag_aid_position_pattern.sub(replacement, tag, 1)
 | |
|                     srcpieces[j] = tag
 | |
|             part = b"".join(srcpieces)
 | |
|             parts[i] = part
 | |
| 
 | |
|         # we can safely replace all of the Kindlegen generated data-AmznPageBreak tags
 | |
|         # with page-break-after style patterns
 | |
|         find_tag_with_AmznPageBreak_pattern = re.compile(br'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
 | |
|         within_tag_AmznPageBreak_position_pattern = re.compile(br'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
 | |
|             for j in range(len(srcpieces)):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<'):
 | |
|                     srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
 | |
|                         lambda m:b' style="page-break-after:' + m.group(1) + b'"', tag)
 | |
|             part = b"".join(srcpieces)
 | |
|             parts[i] = part
 | |
| 
 | |
|         # we have to handle substitutions for the flows  pieces first as they may
 | |
|         # be inlined into the xhtml text
 | |
|         #   kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
 | |
|         #   kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
 | |
|         #   kindle:embed:XXXX   (used for fonts)
 | |
| 
 | |
|         flows = []
 | |
|         flows.append(None)
 | |
|         flowinfo = []
 | |
|         flowinfo.append([None, None, None, None])
 | |
| 
 | |
|         # regular expression search patterns
 | |
|         img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
 | |
|         img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
 | |
| 
 | |
|         tag_pattern = re.compile(br'''(<[^>]*>)''')
 | |
|         flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
 | |
| 
 | |
|         url_pattern = re.compile(br'''(url\(.*?\))''', re.IGNORECASE)
 | |
|         url_img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*["')]''', re.IGNORECASE)
 | |
|         font_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)["')]''', re.IGNORECASE)
 | |
|         url_css_index_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
 | |
|         url_svg_image_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=image/svg\+xml[^\)]*''', re.IGNORECASE)
 | |
| 
 | |
|         for i in range(1, self.k8proc.getNumberOfFlows()):
 | |
|             [ftype, format, dir, filename] = self.k8proc.getFlowInfo(i)
 | |
|             flowpart = self.k8proc.getFlow(i)
 | |
| 
 | |
|             # links to raster image files from image tags
 | |
|             # image_pattern
 | |
|             srcpieces = img_pattern.split(flowpart)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<im'):
 | |
|                     for m in img_index_pattern.finditer(tag):
 | |
|                         imageNumber = fromBase32(m.group(1))
 | |
|                         imageName = self.rscnames[imageNumber-1]
 | |
|                         if imageName is not None:
 | |
|                             replacement = b'"../Images/' + utf8_str(imageName) + b'"'
 | |
|                             self.used[imageName] = 'used'
 | |
|                             tag = img_index_pattern.sub(replacement, tag, 1)
 | |
|                         else:
 | |
|                             print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
 | |
|                     srcpieces[j] = tag
 | |
|             flowpart = b"".join(srcpieces)
 | |
| 
 | |
|             # replacements inside css url():
 | |
|             srcpieces = url_pattern.split(flowpart)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
| 
 | |
|                 #  process links to raster image files
 | |
|                 for m in url_img_index_pattern.finditer(tag):
 | |
|                     imageNumber = fromBase32(m.group(1))
 | |
|                     imageName = self.rscnames[imageNumber-1]
 | |
|                     osep = m.group()[0:1]
 | |
|                     csep = m.group()[-1:]
 | |
|                     if imageName is not None:
 | |
|                         replacement = osep +  b'../Images/' + utf8_str(imageName) +  csep
 | |
|                         self.used[imageName] = 'used'
 | |
|                         tag = url_img_index_pattern.sub(replacement, tag, 1)
 | |
|                     else:
 | |
|                         print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
 | |
| 
 | |
|                 # process links to fonts
 | |
|                 for m in font_index_pattern.finditer(tag):
 | |
|                     fontNumber = fromBase32(m.group(1))
 | |
|                     fontName = self.rscnames[fontNumber-1]
 | |
|                     osep = m.group()[0:1]
 | |
|                     csep = m.group()[-1:]
 | |
|                     if fontName is None:
 | |
|                         print("Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag))
 | |
|                     else:
 | |
|                         replacement = osep +  b'../Fonts/' + utf8_str(fontName) +  csep
 | |
|                         tag = font_index_pattern.sub(replacement, tag, 1)
 | |
|                         self.used[fontName] = 'used'
 | |
| 
 | |
|                 # process links to other css pieces
 | |
|                 for m in url_css_index_pattern.finditer(tag):
 | |
|                     num = fromBase32(m.group(1))
 | |
|                     [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
 | |
|                     replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
 | |
|                     tag = url_css_index_pattern.sub(replacement, tag, 1)
 | |
|                     self.used[fnm] = 'used'
 | |
| 
 | |
|                 # process links to svg images
 | |
|                 for m in url_svg_image_pattern.finditer(tag):
 | |
|                     num = fromBase32(m.group(1))
 | |
|                     [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
 | |
|                     replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
 | |
|                     tag = url_svg_image_pattern.sub(replacement, tag, 1)
 | |
|                     self.used[fnm] = 'used'
 | |
| 
 | |
|                 srcpieces[j] = tag
 | |
|             flowpart = b"".join(srcpieces)
 | |
| 
 | |
|             # store away in our own copy
 | |
|             flows.append(flowpart)
 | |
| 
 | |
|             # I do not think this case exists and even if it does exist, it needs to be done in a separate
 | |
|             # pass to prevent inlining a flow piece into another flow piece before the inserted one or the
 | |
|             # target one has been fully processed
 | |
| 
 | |
|             # but keep it around if it ends up we do need it
 | |
| 
 | |
|             # flow pattern not inside url()
 | |
|             # srcpieces = tag_pattern.split(flowpart)
 | |
|             # for j in range(1, len(srcpieces),2):
 | |
|             #     tag = srcpieces[j]
 | |
|             #     if tag.startswith(b'<'):
 | |
|             #         for m in flow_pattern.finditer(tag):
 | |
|             #             num = fromBase32(m.group(1))
 | |
|             #             [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
 | |
|             #             flowtext = self.k8proc.getFlow(num)
 | |
|             #             if fmt == b'inline':
 | |
|             #                 tag = flowtext
 | |
|             #             else:
 | |
|             #                 replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
 | |
|             #                 tag = flow_pattern.sub(replacement, tag, 1)
 | |
|             #                 self.used[fnm] = 'used'
 | |
|             #         srcpieces[j] = tag
 | |
|             # flowpart = b"".join(srcpieces)
 | |
| 
 | |
|         # now handle the main text xhtml parts
 | |
| 
 | |
|         # Handle the flow items in the XHTML text pieces
 | |
|         # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
 | |
|         tag_pattern = re.compile(br'''(<[^>]*>)''')
 | |
|         flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
 | |
|             # flow pattern
 | |
|             srcpieces = tag_pattern.split(part)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<'):
 | |
|                     for m in flow_pattern.finditer(tag):
 | |
|                         num = fromBase32(m.group(1))
 | |
|                         if num > 0 and num < len(self.k8proc.flowinfo):
 | |
|                             [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
 | |
|                             flowpart = flows[num]
 | |
|                             if fmt == b'inline':
 | |
|                                 tag = flowpart
 | |
|                             else:
 | |
|                                 replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
 | |
|                                 tag = flow_pattern.sub(replacement, tag, 1)
 | |
|                                 self.used[fnm] = 'used'
 | |
|                         else:
 | |
|                             print("warning: ignoring non-existent flow link", tag, " value 0x%x" % num)
 | |
|                     srcpieces[j] = tag
 | |
|             part = b''.join(srcpieces)
 | |
| 
 | |
|             # store away modified version
 | |
|             parts[i] = part
 | |
| 
 | |
|         # Handle any embedded raster images links in style= attributes urls
 | |
|         style_pattern = re.compile(br'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''', re.IGNORECASE)
 | |
|         img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
 | |
| 
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
 | |
| 
 | |
|             # replace urls in style attributes
 | |
|             srcpieces = style_pattern.split(part)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if b'kindle:embed' in tag:
 | |
|                     for m in img_index_pattern.finditer(tag):
 | |
|                         imageNumber = fromBase32(m.group(1))
 | |
|                         imageName = self.rscnames[imageNumber-1]
 | |
|                         osep = m.group()[0:1]
 | |
|                         csep = m.group()[-1:]
 | |
|                         if imageName is not None:
 | |
|                             replacement = osep + b'../Images/'+ utf8_str(imageName) + csep
 | |
|                             self.used[imageName] = 'used'
 | |
|                             tag = img_index_pattern.sub(replacement, tag, 1)
 | |
|                         else:
 | |
|                             print("Error: Referenced image %s in style url was not recognized in %s" % (imageNumber, tag))
 | |
|                     srcpieces[j] = tag
 | |
|             part = b"".join(srcpieces)
 | |
| 
 | |
|             # store away modified version
 | |
|             parts[i] = part
 | |
| 
 | |
|         # Handle any embedded raster images links in the xhtml text
 | |
|         # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
 | |
|         img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
 | |
|         img_index_pattern = re.compile(br'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
 | |
| 
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
 | |
| 
 | |
|             # links to raster image files
 | |
|             # image_pattern
 | |
|             srcpieces = img_pattern.split(part)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<im'):
 | |
|                     for m in img_index_pattern.finditer(tag):
 | |
|                         imageNumber = fromBase32(m.group(1))
 | |
|                         imageName = self.rscnames[imageNumber-1]
 | |
|                         if imageName is not None:
 | |
|                             replacement = b'"../Images/' + utf8_str(imageName) + b'"'
 | |
|                             self.used[imageName] = 'used'
 | |
|                             tag = img_index_pattern.sub(replacement, tag, 1)
 | |
|                         else:
 | |
|                             print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
 | |
|                     srcpieces[j] = tag
 | |
|             part = b"".join(srcpieces)
 | |
|             # store away modified version
 | |
|             parts[i] = part
 | |
| 
 | |
|         # finally perform any general cleanups needed to make valid XHTML
 | |
|         # these include:
 | |
|         #   in svg tags replace "perserveaspectratio" attributes with "perserveAspectRatio"
 | |
|         #   in svg tags replace "viewbox" attributes with "viewBox"
 | |
|         #   in <li> remove value="XX" attributes since these are illegal
 | |
|         tag_pattern = re.compile(br'''(<[^>]*>)''')
 | |
|         li_value_pattern = re.compile(br'''\svalue\s*=\s*['"][^'"]*['"]''', re.IGNORECASE)
 | |
| 
 | |
|         for i in range(len(parts)):
 | |
|             part = parts[i]
 | |
|             [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
 | |
| 
 | |
|             # tag pattern
 | |
|             srcpieces = tag_pattern.split(part)
 | |
|             for j in range(1, len(srcpieces),2):
 | |
|                 tag = srcpieces[j]
 | |
|                 if tag.startswith(b'<svg') or tag.startswith(b'<SVG'):
 | |
|                     tag = tag.replace(b'preserveaspectratio',b'preserveAspectRatio')
 | |
|                     tag = tag.replace(b'viewbox',b'viewBox')
 | |
|                 elif tag.startswith(b'<li ') or tag.startswith(b'<LI '):
 | |
|                     tagpieces = li_value_pattern.split(tag)
 | |
|                     tag = b"".join(tagpieces)
 | |
|                 srcpieces[j] = tag
 | |
|             part = b"".join(srcpieces)
 | |
|             # store away modified version
 | |
|             parts[i] = part
 | |
| 
 | |
|         self.k8proc.setFlows(flows)
 | |
|         self.k8proc.setParts(parts)
 | |
| 
 | |
|         return self.used
 |