361 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			361 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import unicode_literals
 | |
| from __future__ import absolute_import
 | |
| from . import util
 | |
| from . import odict
 | |
| from . import inlinepatterns
 | |
| 
 | |
| 
 | |
| def build_treeprocessors(md_instance, **kwargs):
 | |
|     """ Build the default treeprocessors for Markdown. """
 | |
|     treeprocessors = odict.OrderedDict()
 | |
|     treeprocessors["inline"] = InlineProcessor(md_instance)
 | |
|     treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
 | |
|     return treeprocessors
 | |
| 
 | |
| 
 | |
| def isString(s):
 | |
|     """ Check if it's string """
 | |
|     if not isinstance(s, util.AtomicString):
 | |
|         return isinstance(s, util.string_type)
 | |
|     return False
 | |
| 
 | |
| 
 | |
| class Treeprocessor(util.Processor):
 | |
|     """
 | |
|     Treeprocessors are run on the ElementTree object before serialization.
 | |
| 
 | |
|     Each Treeprocessor implements a "run" method that takes a pointer to an
 | |
|     ElementTree, modifies it as necessary and returns an ElementTree
 | |
|     object.
 | |
| 
 | |
|     Treeprocessors must extend markdown.Treeprocessor.
 | |
| 
 | |
|     """
 | |
|     def run(self, root):
 | |
|         """
 | |
|         Subclasses of Treeprocessor should implement a `run` method, which
 | |
|         takes a root ElementTree. This method can return another ElementTree 
 | |
|         object, and the existing root ElementTree will be replaced, or it can 
 | |
|         modify the current tree and return None.
 | |
|         """
 | |
|         pass
 | |
| 
 | |
| 
 | |
| class InlineProcessor(Treeprocessor):
 | |
|     """
 | |
|     A Treeprocessor that traverses a tree, applying inline patterns.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, md):
 | |
|         self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
 | |
|         self.__placeholder_suffix = util.ETX
 | |
|         self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
 | |
|                                       + len(self.__placeholder_suffix)
 | |
|         self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
 | |
|         self.markdown = md
 | |
| 
 | |
|     def __makePlaceholder(self, type):
 | |
|         """ Generate a placeholder """
 | |
|         id = "%04d" % len(self.stashed_nodes)
 | |
|         hash = util.INLINE_PLACEHOLDER % id
 | |
|         return hash, id
 | |
| 
 | |
|     def __findPlaceholder(self, data, index):
 | |
|         """
 | |
|         Extract id from data string, start from index
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         * data: string
 | |
|         * index: index, from which we start search
 | |
| 
 | |
|         Returns: placeholder id and string index, after the found placeholder.
 | |
|         
 | |
|         """
 | |
|         m = self.__placeholder_re.search(data, index)
 | |
|         if m:
 | |
|             return m.group(1), m.end()
 | |
|         else:
 | |
|             return None, index + 1
 | |
| 
 | |
|     def __stashNode(self, node, type):
 | |
|         """ Add node to stash """
 | |
|         placeholder, id = self.__makePlaceholder(type)
 | |
|         self.stashed_nodes[id] = node
 | |
|         return placeholder
 | |
| 
 | |
|     def __handleInline(self, data, patternIndex=0):
 | |
|         """
 | |
|         Process string with inline patterns and replace it
 | |
|         with placeholders
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         * data: A line of Markdown text
 | |
|         * patternIndex: The index of the inlinePattern to start with
 | |
| 
 | |
|         Returns: String with placeholders.
 | |
| 
 | |
|         """
 | |
|         if not isinstance(data, util.AtomicString):
 | |
|             startIndex = 0
 | |
|             while patternIndex < len(self.markdown.inlinePatterns):
 | |
|                 data, matched, startIndex = self.__applyPattern(
 | |
|                     self.markdown.inlinePatterns.value_for_index(patternIndex),
 | |
|                     data, patternIndex, startIndex)
 | |
|                 if not matched:
 | |
|                     patternIndex += 1
 | |
|         return data
 | |
| 
 | |
|     def __processElementText(self, node, subnode, isText=True):
 | |
|         """
 | |
|         Process placeholders in Element.text or Element.tail
 | |
|         of Elements popped from self.stashed_nodes.
 | |
| 
 | |
|         Keywords arguments:
 | |
| 
 | |
|         * node: parent node
 | |
|         * subnode: processing node
 | |
|         * isText: bool variable, True - it's text, False - it's tail
 | |
| 
 | |
|         Returns: None
 | |
| 
 | |
|         """
 | |
|         if isText:
 | |
|             text = subnode.text
 | |
|             subnode.text = None
 | |
|         else:
 | |
|             text = subnode.tail
 | |
|             subnode.tail = None
 | |
| 
 | |
|         childResult = self.__processPlaceholders(text, subnode)
 | |
| 
 | |
|         if not isText and node is not subnode:
 | |
|             pos = node.getchildren().index(subnode)
 | |
|             node.remove(subnode)
 | |
|         else:
 | |
|             pos = 0
 | |
| 
 | |
|         childResult.reverse()
 | |
|         for newChild in childResult:
 | |
|             node.insert(pos, newChild)
 | |
| 
 | |
|     def __processPlaceholders(self, data, parent):
 | |
|         """
 | |
|         Process string with placeholders and generate ElementTree tree.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         * data: string with placeholders instead of ElementTree elements.
 | |
|         * parent: Element, which contains processing inline data
 | |
| 
 | |
|         Returns: list with ElementTree elements with applied inline patterns.
 | |
|         
 | |
|         """
 | |
|         def linkText(text):
 | |
|             if text:
 | |
|                 if result:
 | |
|                     if result[-1].tail:
 | |
|                         result[-1].tail += text
 | |
|                     else:
 | |
|                         result[-1].tail = text
 | |
|                 else:
 | |
|                     if parent.text:
 | |
|                         parent.text += text
 | |
|                     else:
 | |
|                         parent.text = text
 | |
|         result = []
 | |
|         strartIndex = 0
 | |
|         while data:
 | |
|             index = data.find(self.__placeholder_prefix, strartIndex)
 | |
|             if index != -1:
 | |
|                 id, phEndIndex = self.__findPlaceholder(data, index)
 | |
| 
 | |
|                 if id in self.stashed_nodes:
 | |
|                     node = self.stashed_nodes.get(id)
 | |
| 
 | |
|                     if index > 0:
 | |
|                         text = data[strartIndex:index]
 | |
|                         linkText(text)
 | |
| 
 | |
|                     if not isString(node): # it's Element
 | |
|                         for child in [node] + node.getchildren():
 | |
|                             if child.tail:
 | |
|                                 if child.tail.strip():
 | |
|                                     self.__processElementText(node, child,False)
 | |
|                             if child.text:
 | |
|                                 if child.text.strip():
 | |
|                                     self.__processElementText(child, child)
 | |
|                     else: # it's just a string
 | |
|                         linkText(node)
 | |
|                         strartIndex = phEndIndex
 | |
|                         continue
 | |
| 
 | |
|                     strartIndex = phEndIndex
 | |
|                     result.append(node)
 | |
| 
 | |
|                 else: # wrong placeholder
 | |
|                     end = index + len(self.__placeholder_prefix)
 | |
|                     linkText(data[strartIndex:end])
 | |
|                     strartIndex = end
 | |
|             else:
 | |
|                 text = data[strartIndex:]
 | |
|                 if isinstance(data, util.AtomicString):
 | |
|                     # We don't want to loose the AtomicString
 | |
|                     text = util.AtomicString(text)
 | |
|                 linkText(text)
 | |
|                 data = ""
 | |
| 
 | |
|         return result
 | |
| 
 | |
|     def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
 | |
|         """
 | |
|         Check if the line fits the pattern, create the necessary
 | |
|         elements, add it to stashed_nodes.
 | |
| 
 | |
|         Keyword arguments:
 | |
| 
 | |
|         * data: the text to be processed
 | |
|         * pattern: the pattern to be checked
 | |
|         * patternIndex: index of current pattern
 | |
|         * startIndex: string index, from which we start searching
 | |
| 
 | |
|         Returns: String with placeholders instead of ElementTree elements.
 | |
| 
 | |
|         """
 | |
|         match = pattern.getCompiledRegExp().match(data[startIndex:])
 | |
|         leftData = data[:startIndex]
 | |
| 
 | |
|         if not match:
 | |
|             return data, False, 0
 | |
| 
 | |
|         node = pattern.handleMatch(match)
 | |
| 
 | |
|         if node is None:
 | |
|             return data, True, len(leftData)+match.span(len(match.groups()))[0]
 | |
| 
 | |
|         if not isString(node):
 | |
|             if not isinstance(node.text, util.AtomicString):
 | |
|                 # We need to process current node too
 | |
|                 for child in [node] + node.getchildren():
 | |
|                     if not isString(node):
 | |
|                         if child.text: 
 | |
|                             child.text = self.__handleInline(child.text,
 | |
|                                                             patternIndex + 1)
 | |
|                         if child.tail:
 | |
|                             child.tail = self.__handleInline(child.tail,
 | |
|                                                             patternIndex)
 | |
| 
 | |
|         placeholder = self.__stashNode(node, pattern.type())
 | |
| 
 | |
|         return "%s%s%s%s" % (leftData,
 | |
|                              match.group(1),
 | |
|                              placeholder, match.groups()[-1]), True, 0
 | |
| 
 | |
|     def run(self, tree):
 | |
|         """Apply inline patterns to a parsed Markdown tree.
 | |
| 
 | |
|         Iterate over ElementTree, find elements with inline tag, apply inline
 | |
|         patterns and append newly created Elements to tree.  If you don't
 | |
|         want to process your data with inline paterns, instead of normal string,
 | |
|         use subclass AtomicString:
 | |
| 
 | |
|             node.text = markdown.AtomicString("This will not be processed.")
 | |
| 
 | |
|         Arguments:
 | |
| 
 | |
|         * tree: ElementTree object, representing Markdown tree.
 | |
| 
 | |
|         Returns: ElementTree object with applied inline patterns.
 | |
| 
 | |
|         """
 | |
|         self.stashed_nodes = {}
 | |
| 
 | |
|         stack = [tree]
 | |
| 
 | |
|         while stack:
 | |
|             currElement = stack.pop()
 | |
|             insertQueue = []
 | |
|             for child in currElement.getchildren():
 | |
|                 if child.text and not isinstance(child.text, util.AtomicString):
 | |
|                     text = child.text
 | |
|                     child.text = None
 | |
|                     lst = self.__processPlaceholders(self.__handleInline(
 | |
|                                                     text), child)
 | |
|                     stack += lst
 | |
|                     insertQueue.append((child, lst))
 | |
|                 if child.tail:
 | |
|                     tail = self.__handleInline(child.tail)
 | |
|                     dumby = util.etree.Element('d')
 | |
|                     tailResult = self.__processPlaceholders(tail, dumby)
 | |
|                     if dumby.text:
 | |
|                         child.tail = dumby.text
 | |
|                     else:
 | |
|                         child.tail = None
 | |
|                     pos = currElement.getchildren().index(child) + 1
 | |
|                     tailResult.reverse()
 | |
|                     for newChild in tailResult:
 | |
|                         currElement.insert(pos, newChild)
 | |
|                 if child.getchildren():
 | |
|                     stack.append(child)
 | |
| 
 | |
|             for element, lst in insertQueue:
 | |
|                 if self.markdown.enable_attributes:
 | |
|                     if element.text and isString(element.text):
 | |
|                         element.text = \
 | |
|                             inlinepatterns.handleAttributes(element.text, 
 | |
|                                                                     element)
 | |
|                 i = 0
 | |
|                 for newChild in lst:
 | |
|                     if self.markdown.enable_attributes:
 | |
|                         # Processing attributes
 | |
|                         if newChild.tail and isString(newChild.tail):
 | |
|                             newChild.tail = \
 | |
|                                 inlinepatterns.handleAttributes(newChild.tail,
 | |
|                                                                     element)
 | |
|                         if newChild.text and isString(newChild.text):
 | |
|                             newChild.text = \
 | |
|                                 inlinepatterns.handleAttributes(newChild.text,
 | |
|                                                                     newChild)
 | |
|                     element.insert(i, newChild)
 | |
|                     i += 1
 | |
|         return tree
 | |
| 
 | |
| 
 | |
| class PrettifyTreeprocessor(Treeprocessor):
 | |
|     """ Add linebreaks to the html document. """
 | |
| 
 | |
|     def _prettifyETree(self, elem):
 | |
|         """ Recursively add linebreaks to ElementTree children. """
 | |
| 
 | |
|         i = "\n"
 | |
|         if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
 | |
|             if (not elem.text or not elem.text.strip()) \
 | |
|                     and len(elem) and util.isBlockLevel(elem[0].tag):
 | |
|                 elem.text = i
 | |
|             for e in elem:
 | |
|                 if util.isBlockLevel(e.tag):
 | |
|                     self._prettifyETree(e)
 | |
|             if not elem.tail or not elem.tail.strip():
 | |
|                 elem.tail = i
 | |
|         if not elem.tail or not elem.tail.strip():
 | |
|             elem.tail = i
 | |
| 
 | |
|     def run(self, root):
 | |
|         """ Add linebreaks to ElementTree root object. """
 | |
| 
 | |
|         self._prettifyETree(root)
 | |
|         # Do <br />'s seperately as they are often in the middle of
 | |
|         # inline content and missed by _prettifyETree.
 | |
|         brs = root.getiterator('br')
 | |
|         for br in brs:
 | |
|             if not br.tail or not br.tail.strip():
 | |
|                 br.tail = '\n'
 | |
|             else:
 | |
|                 br.tail = '\n%s' % br.tail
 | |
|         # Clean up extra empty lines at end of code blocks.
 | |
|         pres = root.getiterator('pre')
 | |
|         for pre in pres:
 | |
|             if len(pre) and pre[0].tag == 'code':
 | |
|                 pre[0].text = pre[0].text.rstrip() + '\n'
 |