# coding: utf8 import urllib import base64 import re import json import sys import sublime_plugin class StringEncode(sublime_plugin.TextCommand): def run(self, edit): for region in self.view.sel(): if region.empty(): continue text = self.view.substr(region) replacement = self.encode(text) self.view.replace(edit, region, replacement) html_escape_table = { u"\"": """, u"'": "'", u"<": "<", u">": ">", u"¡": "¡", u"¢": "¢", u"£": "£", u"¤": "¤", u"¥": "¥", u"¦": "¦", u"§": "§", u"¨": "¨", u"©": "©", u"ª": "ª", u"«": "«", u"¬": "¬", u"®": "®", u"¯": "¯", u"°": "°", u"±": "±", u"²": "²", u"³": "³", u"´": "´", u"µ": "µ", u"¶": "¶", u"·": "·", u"¸": "¸", u"¹": "¹", u"º": "º", u"»": "»", u"¼": "¼", u"½": "½", u"¾": "¾", u"¿": "¿", u"À": "À", u"Á": "Á", u"Â": "Â", u"Ã": "Ã", u"Ä": "Ä", u"Å": "Å", u"Æ": "Æ", u"Ç": "Ç", u"È": "È", u"É": "É", u"Ê": "Ê", u"Ë": "Ë", u"Ì": "Ì", u"Í": "Í", u"Î": "Î", u"Ï": "Ï", u"Ð": "Ð", u"Ñ": "Ñ", u"Ò": "Ò", u"Ó": "Ó", u"Ô": "Ô", u"Õ": "Õ", u"Ö": "Ö", u"×": "×", u"Ø": "Ø", u"Ù": "Ù", u"Ú": "Ú", u"Û": "Û", u"Ü": "Ü", u"Ý": "Ý", u"Þ": "Þ", u"ß": "ß", u"à": "à", u"á": "á", u"â": "â", u"ã": "ã", u"ä": "ä", u"å": "å", u"æ": "æ", u"ç": "ç", u"è": "è", u"é": "é", u"ê": "ê", u"ë": "ë", u"ì": "ì", u"í": "í", u"î": "î", u"ï": "ï", u"ð": "ð", u"ñ": "ñ", u"ò": "ò", u"ó": "ó", u"ô": "ô", u"õ": "õ", u"ö": "ö", u"÷": "÷", u"ø": "ø", u"ù": "ù", u"ú": "ú", u"û": "û", u"ü": "ü", u"ý": "ý", u"þ": "þ", u"ÿ": "ÿ", u"Œ": "Œ", u"œ": "œ", u"Š": "Š", u"š": "š", u"Ÿ": "Ÿ", u"ƒ": "ƒ", u"ˆ": "ˆ", u"˜": "˜", u"Α": "Α", u"Β": "Β", u"Γ": "Γ", u"Δ": "Δ", u"Ε": "Ε", u"Ζ": "Ζ", u"Η": "Η", u"Θ": "Θ", u"Ι": "Ι", u"Κ": "Κ", u"Λ": "Λ", u"Μ": "Μ", u"Ν": "Ν", u"Ξ": "Ξ", u"Ο": "Ο", u"Π": "Π", u"Ρ": "Ρ", u"Σ": "Σ", u"Τ": "Τ", u"Υ": "Υ", u"Φ": "Φ", u"Χ": "Χ", u"Ψ": "Ψ", u"Ω": "Ω", u"α": "α", u"β": "β", u"γ": "γ", u"δ": "δ", u"ε": "ε", u"ζ": "ζ", u"η": "η", u"θ": "θ", u"ι": "ι", u"κ": "κ", u"λ": "λ", u"μ": "μ", u"ν": "ν", u"ξ": "ξ", u"ο": "ο", u"π": "π", u"ρ": "ρ", u"ς": "ς", u"σ": "σ", u"τ": "τ", u"υ": "υ", u"φ": "φ", u"χ": "χ", u"ψ": "ψ", u"ω": "ω", u"ϑ": "ϑ", u"ϒ": "ϒ", u"ϖ": "ϖ", u"–": "–", u"—": "—", u"‘": "‘", u"’": "’", u"‚": "‚", u"“": "“", u"”": "”", u"„": "„", u"†": "†", u"‡": "‡", u"•": "•", u"…": "…", u"‰": "‰", u"′": "′", u"″": "″", u"‹": "‹", u"›": "›", u"‾": "‾", u"⁄": "⁄", u"€": "€", u"ℑ": "ℑ", u"℘": "℘", u"ℜ": "ℜ", u"™": "™", u"ℵ": "ℵ", u"←": "←", u"↑": "↑", u"→": "→", u"↓": "↓", u"↔": "↔", u"↵": "↵", u"⇐": "⇐", u"⇑": "⇑", u"⇒": "⇒", u"⇓": "⇓", u"⇔": "⇔", u"∀": "∀", u"∂": "∂", u"∃": "∃", u"∅": "∅", u"∇": "∇", u"∈": "∈", u"∉": "∉", u"∋": "∋", u"∏": "∏", u"∑": "∑", u"−": "−", u"∗": "∗", u"√": "√", u"∝": "∝", u"∞": "∞", u"∠": "∠", u"∧": "∧", u"∨": "∨", u"∩": "∩", u"∪": "∪", u"∫": "∫", u"∴": "∴", u"∼": "∼", u"≅": "≅", u"≈": "≈", u"≠": "≠", u"≡": "≡", u"≤": "≤", u"≥": "≥", u"⊂": "⊂", u"⊃": "⊃", u"⊄": "⊄", u"⊆": "⊆", u"⊇": "⊇", u"⊕": "⊕", u"⊗": "⊗", u"⊥": "⊥", u"⋅": "⋅", u"⌈": "⌈", u"⌉": "⌉", u"⌊": "⌊", u"⌋": "⌋", u"〈": "⟨", u"〉": "⟩", u"◊": "◊", u"♠": "♠", u"♣": "♣", u"♥": "♥", u"♦": "♦", u"\xa0": " ", } xml_escape_table = { u"\"": """, u"'": "'", u"<": "<", u">": ">" } html_reserved_list = (u"\"", u"'", u"<", u">", u"&") class HtmlEntitizeCommand(StringEncode): def encode(self, text): text = text.replace('&', '&') for k in html_escape_table: v = html_escape_table[k] text = text.replace(k, v) ret = '' for i, c in enumerate(text): if ord(c) > 127: ret += hex(ord(c)).replace('0x', '&#x') + ';' else: ret += c return ret class HtmlDeentitizeCommand(StringEncode): def encode(self, text): for k in html_escape_table: v = html_escape_table[k] text = text.replace(v, k) while re.search('&#[xX][a-fA-F0-9]+;', text): match = re.search('&#[xX]([a-fA-F0-9]+);', text) text = text.replace(match.group(0), unichr(int('0x' + match.group(1), 16))) text = text.replace('&', '&') return text class SafeHtmlEntitizeCommand(StringEncode): def encode(self, text): for k in html_escape_table: # skip HTML reserved characters if k in html_reserved_list: continue v = html_escape_table[k] text = text.replace(k, v) ret = '' for i, c in enumerate(text): if ord(c) > 127: ret += hex(ord(c)).replace('0x', '&#x') + ';' else: ret += c return ret class SafeHtmlDeentitizeCommand(StringEncode): def encode(self, text): for k in html_escape_table: # skip HTML reserved characters if k in html_reserved_list: continue v = html_escape_table[k] text = text.replace(v, k) while re.search('&#[xX][a-fA-F0-9]+;', text): match = re.search('&#[xX]([a-fA-F0-9]+);', text) text = text.replace(match.group(0), unichr(int('0x' + match.group(1), 16))) text = text.replace('&', '&') return text class XmlEntitizeCommand(StringEncode): def encode(self, text): text = text.replace('&', '&') for k in xml_escape_table: v = xml_escape_table[k] text = text.replace(k, v) ret = '' for i, c in enumerate(text): if ord(c) > 127: ret += hex(ord(c)).replace('0x', '&#x') + ';' else: ret += c return ret class XmlDeentitizeCommand(StringEncode): def encode(self, text): for k in xml_escape_table: v = xml_escape_table[k] text = text.replace(v, k) text = text.replace('&', '&') return text class JsonEscapeCommand(StringEncode): def encode(self, text): return json.dumps(text) class JsonUnescapeCommand(StringEncode): def encode(self, text): return json.loads(text) class UrlEncodeCommand(StringEncode): def encode(self, text): return urllib.parse.quote(text) class UrlDecodeCommand(StringEncode): def encode(self, text): return urllib.parse.unquote(text) class Base64EncodeCommand(StringEncode): def encode(self, text): return base64.b64encode(text) class Base64DecodeCommand(StringEncode): def encode(self, text): return base64.b64decode(text) class Escaper(StringEncode): def encode(self, text): return re.sub(r'(?= 0xd800 and tmp <= 0xdbff: char_index += 1 else: hex_text += '\\u' + '{0:04x}'.format(tmp) char_index = 0 elif char_index == 2: c3 = c char_index += 1 elif char_index == 3: c4 = c if endian == 'little': c3, c4 = c4, c3 tmp1 = ((c1 << 8) + c2) - 0xd800 tmp2 = ((c3 << 8) + c4) - 0xdc00 tmp = (tmp1 * 0x400) + tmp2 + 0x10000 hex_text += '\\U' + '{0:08x}'.format(tmp) char_index = 0 return hex_text class HexUnicodeCommand(StringEncode): def encode(self, text): uni_text = text endian = sys.byteorder r = re.compile(r'\\u([0-9a-fA-F]{2})([0-9a-fA-F]{2})') rr = r.search(uni_text) while rr: first_byte = int(rr.group(1), 16) if first_byte >= 0xd8 and first_byte <= 0xdf: # Surrogate pair pass else: if endian == 'little': b1 = int(rr.group(2), 16) b2 = int(rr.group(1), 16) else: b1 = int(rr.group(1), 16) b2 = int(rr.group(2), 16) ch = bytes([b1, b2]).decode('utf-16') uni_text = uni_text.replace(rr.group(0), ch) rr = r.search(uni_text, rr.start(0)+1) # Surrogate pair (2 bytes + 2 bytes) r = re.compile(r'\\u([0-9a-fA-F]{2})([0-9a-fA-F]{2})\\u([0-9a-fA-F]{2})([0-9a-fA-F]{2})') rr = r.search(uni_text) while rr: if endian == 'little': b1 = int(rr.group(2), 16) b2 = int(rr.group(1), 16) b3 = int(rr.group(4), 16) b4 = int(rr.group(3), 16) else: b1 = int(rr.group(1), 16) b2 = int(rr.group(2), 16) b3 = int(rr.group(3), 16) b4 = int(rr.group(4), 16) ch = bytes([b1, b2, b3, b4]).decode('utf-16') uni_text = uni_text.replace(rr.group(0), ch) rr = r.search(uni_text) # Surrogate pair (4 bytes) r = re.compile(r'\\U([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})') rr = r.search(uni_text) while rr: tmp = (int(rr.group(1), 16) << 24) \ + (int(rr.group(2), 16) << 16) \ + (int(rr.group(3), 16) << 8) \ + (int(rr.group(4), 16)) if (tmp <= 0xffff): ch = chr(tmp) else: tmp -= 0x10000 c1 = 0xd800 + int(tmp / 0x400) c2 = 0xdc00 + int(tmp % 0x400) if endian == 'little': b1 = c1 & 0xff b2 = c1 >> 8 b3 = c2 & 0xff b4 = c2 >> 8 else: b1 = c1 >> 8 b2 = c1 & 0xff b3 = c2 >> 8 b4 = c2 & 0xff ch = bytes([b1, b2, b3, b4]).decode('utf-16') uni_text = uni_text.replace(rr.group(0), ch) rr = r.search(uni_text) return uni_text