278 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			278 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # markdown/searializers.py
 | |
| #
 | |
| # Add x/html serialization to Elementree
 | |
| # Taken from ElementTree 1.3 preview with slight modifications
 | |
| #
 | |
| # Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
 | |
| #
 | |
| # fredrik@pythonware.com
 | |
| # http://www.pythonware.com
 | |
| #
 | |
| # --------------------------------------------------------------------
 | |
| # The ElementTree toolkit is
 | |
| #
 | |
| # Copyright (c) 1999-2007 by Fredrik Lundh
 | |
| #
 | |
| # By obtaining, using, and/or copying this software and/or its
 | |
| # associated documentation, you agree that you have read, understood,
 | |
| # and will comply with the following terms and conditions:
 | |
| #
 | |
| # Permission to use, copy, modify, and distribute this software and
 | |
| # its associated documentation for any purpose and without fee is
 | |
| # hereby granted, provided that the above copyright notice appears in
 | |
| # all copies, and that both that copyright notice and this permission
 | |
| # notice appear in supporting documentation, and that the name of
 | |
| # Secret Labs AB or the author not be used in advertising or publicity
 | |
| # pertaining to distribution of the software without specific, written
 | |
| # prior permission.
 | |
| #
 | |
| # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
 | |
| # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
 | |
| # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
 | |
| # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
 | |
| # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 | |
| # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 | |
| # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 | |
| # OF THIS SOFTWARE.
 | |
| # --------------------------------------------------------------------
 | |
| 
 | |
| 
 | |
| from __future__ import absolute_import
 | |
| from __future__ import unicode_literals
 | |
| from . import util
 | |
| ElementTree = util.etree.ElementTree
 | |
| QName = util.etree.QName
 | |
| if hasattr(util.etree, 'test_comment'):
 | |
|     Comment = util.etree.test_comment
 | |
| else:
 | |
|     Comment = util.etree.Comment
 | |
| PI = util.etree.PI
 | |
| ProcessingInstruction = util.etree.ProcessingInstruction
 | |
| 
 | |
| __all__ = ['to_html_string', 'to_xhtml_string']
 | |
| 
 | |
| HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
 | |
|               "img", "input", "isindex", "link", "meta" "param")
 | |
| 
 | |
| try:
 | |
|     HTML_EMPTY = set(HTML_EMPTY)
 | |
| except NameError:
 | |
|     pass
 | |
| 
 | |
| _namespace_map = {
 | |
|     # "well-known" namespace prefixes
 | |
|     "http://www.w3.org/XML/1998/namespace": "xml",
 | |
|     "http://www.w3.org/1999/xhtml": "html",
 | |
|     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
 | |
|     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
 | |
|     # xml schema
 | |
|     "http://www.w3.org/2001/XMLSchema": "xs",
 | |
|     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
 | |
|     # dublic core
 | |
|     "http://purl.org/dc/elements/1.1/": "dc",
 | |
| }
 | |
| 
 | |
| 
 | |
| def _raise_serialization_error(text):
 | |
|     raise TypeError(
 | |
|         "cannot serialize %r (type %s)" % (text, type(text).__name__)
 | |
|         )
 | |
| 
 | |
| def _encode(text, encoding):
 | |
|     try:
 | |
|         return text.encode(encoding, "xmlcharrefreplace")
 | |
|     except (TypeError, AttributeError):
 | |
|         _raise_serialization_error(text)
 | |
| 
 | |
| def _escape_cdata(text):
 | |
|     # escape character data
 | |
|     try:
 | |
|         # it's worth avoiding do-nothing calls for strings that are
 | |
|         # shorter than 500 character, or so.  assume that's, by far,
 | |
|         # the most common case in most applications.
 | |
|         if "&" in text:
 | |
|             text = text.replace("&", "&")
 | |
|         if "<" in text:
 | |
|             text = text.replace("<", "<")
 | |
|         if ">" in text:
 | |
|             text = text.replace(">", ">")
 | |
|         return text
 | |
|     except (TypeError, AttributeError):
 | |
|         _raise_serialization_error(text)
 | |
| 
 | |
| 
 | |
| def _escape_attrib(text):
 | |
|     # escape attribute value
 | |
|     try:
 | |
|         if "&" in text:
 | |
|             text = text.replace("&", "&")
 | |
|         if "<" in text:
 | |
|             text = text.replace("<", "<")
 | |
|         if ">" in text:
 | |
|             text = text.replace(">", ">")
 | |
|         if "\"" in text:
 | |
|             text = text.replace("\"", """)
 | |
|         if "\n" in text:
 | |
|             text = text.replace("\n", "
")
 | |
|         return text
 | |
|     except (TypeError, AttributeError):
 | |
|         _raise_serialization_error(text)
 | |
| 
 | |
| def _escape_attrib_html(text):
 | |
|     # escape attribute value
 | |
|     try:
 | |
|         if "&" in text:
 | |
|             text = text.replace("&", "&")
 | |
|         if "<" in text:
 | |
|             text = text.replace("<", "<")
 | |
|         if ">" in text:
 | |
|             text = text.replace(">", ">")
 | |
|         if "\"" in text:
 | |
|             text = text.replace("\"", """)
 | |
|         return text
 | |
|     except (TypeError, AttributeError):
 | |
|         _raise_serialization_error(text)
 | |
| 
 | |
| 
 | |
| def _serialize_html(write, elem, qnames, namespaces, format):
 | |
|     tag = elem.tag
 | |
|     text = elem.text
 | |
|     if tag is Comment:
 | |
|         write("<!--%s-->" % _escape_cdata(text))
 | |
|     elif tag is ProcessingInstruction:
 | |
|         write("<?%s?>" % _escape_cdata(text))
 | |
|     else:
 | |
|         tag = qnames[tag]
 | |
|         if tag is None:
 | |
|             if text:
 | |
|                 write(_escape_cdata(text))
 | |
|             for e in elem:
 | |
|                 _serialize_html(write, e, qnames, None, format)
 | |
|         else:
 | |
|             write("<" + tag)
 | |
|             items = elem.items()
 | |
|             if items or namespaces:
 | |
|                 items.sort() # lexical order
 | |
|                 for k, v in items:
 | |
|                     if isinstance(k, QName):
 | |
|                         k = k.text
 | |
|                     if isinstance(v, QName):
 | |
|                         v = qnames[v.text]
 | |
|                     else:
 | |
|                         v = _escape_attrib_html(v)
 | |
|                     if qnames[k] == v and format == 'html':
 | |
|                         # handle boolean attributes
 | |
|                         write(" %s" % v)
 | |
|                     else:
 | |
|                         write(" %s=\"%s\"" % (qnames[k], v))
 | |
|                 if namespaces:
 | |
|                     items = namespaces.items()
 | |
|                     items.sort(key=lambda x: x[1]) # sort on prefix
 | |
|                     for v, k in items:
 | |
|                         if k:
 | |
|                             k = ":" + k
 | |
|                         write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
 | |
|             if format == "xhtml" and tag in HTML_EMPTY:
 | |
|                 write(" />")
 | |
|             else:
 | |
|                 write(">")
 | |
|                 tag = tag.lower()
 | |
|                 if text:
 | |
|                     if tag == "script" or tag == "style":
 | |
|                         write(text)
 | |
|                     else:
 | |
|                         write(_escape_cdata(text))
 | |
|                 for e in elem:
 | |
|                     _serialize_html(write, e, qnames, None, format)
 | |
|                 if tag not in HTML_EMPTY:
 | |
|                     write("</" + tag + ">")
 | |
|     if elem.tail:
 | |
|         write(_escape_cdata(elem.tail))
 | |
| 
 | |
| def _write_html(root,
 | |
|                 encoding=None,
 | |
|                 default_namespace=None,
 | |
|                 format="html"):
 | |
|     assert root is not None
 | |
|     data = []
 | |
|     write = data.append
 | |
|     qnames, namespaces = _namespaces(root, default_namespace)
 | |
|     _serialize_html(write, root, qnames, namespaces, format)
 | |
|     if encoding is None:
 | |
|         return "".join(data)
 | |
|     else:
 | |
|         return _encode("".join(data))
 | |
| 
 | |
| 
 | |
| # --------------------------------------------------------------------
 | |
| # serialization support
 | |
| 
 | |
| def _namespaces(elem, default_namespace=None):
 | |
|     # identify namespaces used in this tree
 | |
| 
 | |
|     # maps qnames to *encoded* prefix:local names
 | |
|     qnames = {None: None}
 | |
| 
 | |
|     # maps uri:s to prefixes
 | |
|     namespaces = {}
 | |
|     if default_namespace:
 | |
|         namespaces[default_namespace] = ""
 | |
| 
 | |
|     def add_qname(qname):
 | |
|         # calculate serialized qname representation
 | |
|         try:
 | |
|             if qname[:1] == "{":
 | |
|                 uri, tag = qname[1:].split("}", 1)
 | |
|                 prefix = namespaces.get(uri)
 | |
|                 if prefix is None:
 | |
|                     prefix = _namespace_map.get(uri)
 | |
|                     if prefix is None:
 | |
|                         prefix = "ns%d" % len(namespaces)
 | |
|                     if prefix != "xml":
 | |
|                         namespaces[uri] = prefix
 | |
|                 if prefix:
 | |
|                     qnames[qname] = "%s:%s" % (prefix, tag)
 | |
|                 else:
 | |
|                     qnames[qname] = tag # default element
 | |
|             else:
 | |
|                 if default_namespace:
 | |
|                     raise ValueError(
 | |
|                         "cannot use non-qualified names with "
 | |
|                         "default_namespace option"
 | |
|                         )
 | |
|                 qnames[qname] = qname
 | |
|         except TypeError:
 | |
|             _raise_serialization_error(qname)
 | |
| 
 | |
|     # populate qname and namespaces table
 | |
|     try:
 | |
|         iterate = elem.iter
 | |
|     except AttributeError:
 | |
|         iterate = elem.getiterator # cET compatibility
 | |
|     for elem in iterate():
 | |
|         tag = elem.tag
 | |
|         if isinstance(tag, QName) and tag.text not in qnames:
 | |
|             add_qname(tag.text)
 | |
|         elif isinstance(tag, util.string_type):
 | |
|             if tag not in qnames:
 | |
|                 add_qname(tag)
 | |
|         elif tag is not None and tag is not Comment and tag is not PI:
 | |
|             _raise_serialization_error(tag)
 | |
|         for key, value in elem.items():
 | |
|             if isinstance(key, QName):
 | |
|                 key = key.text
 | |
|             if key not in qnames:
 | |
|                 add_qname(key)
 | |
|             if isinstance(value, QName) and value.text not in qnames:
 | |
|                 add_qname(value.text)
 | |
|         text = elem.text
 | |
|         if isinstance(text, QName) and text.text not in qnames:
 | |
|             add_qname(text.text)
 | |
|     return qnames, namespaces
 | |
| 
 | |
| def to_html_string(element):
 | |
|     return _write_html(ElementTree(element).getroot(), format="html")
 | |
| 
 | |
| def to_xhtml_string(element):
 | |
|     return _write_html(ElementTree(element).getroot(), format="xhtml")
 |