2015-11-13 21:55:27 +00:00
|
|
|
import string
|
|
|
|
import lxml.html
|
|
|
|
import lxml.etree
|
2016-06-02 01:03:37 +00:00
|
|
|
from mitmproxy import contentviews
|
|
|
|
from netlib import strutils
|
2015-11-13 21:55:27 +00:00
|
|
|
|
|
|
|
|
2015-11-14 02:46:34 +00:00
|
|
|
class ViewPigLatin(contentviews.View):
|
2015-11-13 21:55:27 +00:00
|
|
|
name = "pig_latin_HTML"
|
|
|
|
prompt = ("pig latin HTML", "l")
|
|
|
|
content_types = ["text/html"]
|
|
|
|
|
|
|
|
def __call__(self, data, **metadata):
|
2016-07-07 00:31:08 +00:00
|
|
|
if strutils.is_xml(data):
|
2015-11-13 21:55:27 +00:00
|
|
|
parser = lxml.etree.HTMLParser(
|
|
|
|
strip_cdata=True,
|
|
|
|
remove_blank_text=True
|
|
|
|
)
|
|
|
|
d = lxml.html.fromstring(data, parser=parser)
|
|
|
|
docinfo = d.getroottree().docinfo
|
|
|
|
|
|
|
|
def piglify(src):
|
2016-07-07 00:31:08 +00:00
|
|
|
words = src.split()
|
2015-11-13 21:55:27 +00:00
|
|
|
ret = ''
|
|
|
|
for word in words:
|
|
|
|
idx = -1
|
2016-05-29 08:23:39 +00:00
|
|
|
while word[idx] in string.punctuation and (idx * -1) != len(word):
|
|
|
|
idx -= 1
|
2015-11-13 21:55:27 +00:00
|
|
|
if word[0].lower() in 'aeiou':
|
2015-11-14 02:46:34 +00:00
|
|
|
if idx == -1:
|
|
|
|
ret += word[0:] + "hay"
|
|
|
|
else:
|
|
|
|
ret += word[0:len(word) + idx + 1] + "hay" + word[idx + 1:]
|
2015-11-13 21:55:27 +00:00
|
|
|
else:
|
2015-11-14 02:46:34 +00:00
|
|
|
if idx == -1:
|
|
|
|
ret += word[1:] + word[0] + "ay"
|
|
|
|
else:
|
|
|
|
ret += word[1:len(word) + idx + 1] + word[0] + "ay" + word[idx + 1:]
|
2015-11-13 21:55:27 +00:00
|
|
|
ret += ' '
|
|
|
|
return ret.strip()
|
|
|
|
|
|
|
|
def recurse(root):
|
|
|
|
if hasattr(root, 'text') and root.text:
|
2015-11-14 02:46:34 +00:00
|
|
|
root.text = piglify(root.text)
|
2015-11-13 21:55:27 +00:00
|
|
|
if hasattr(root, 'tail') and root.tail:
|
2015-11-14 02:46:34 +00:00
|
|
|
root.tail = piglify(root.tail)
|
2015-11-13 21:55:27 +00:00
|
|
|
|
|
|
|
if len(root):
|
|
|
|
for child in root:
|
|
|
|
recurse(child)
|
|
|
|
|
|
|
|
recurse(d)
|
|
|
|
|
|
|
|
s = lxml.etree.tostring(
|
|
|
|
d,
|
|
|
|
pretty_print=True,
|
|
|
|
doctype=docinfo.doctype
|
|
|
|
)
|
2015-11-14 02:46:34 +00:00
|
|
|
return "HTML", contentviews.format_text(s)
|
2015-11-13 21:55:27 +00:00
|
|
|
|
|
|
|
|
|
|
|
pig_view = ViewPigLatin()
|
|
|
|
|
|
|
|
|
2016-06-14 01:17:09 +00:00
|
|
|
def start(context):
|
2015-11-13 21:55:27 +00:00
|
|
|
context.add_contentview(pig_view)
|
|
|
|
|
|
|
|
|
2016-03-10 20:17:05 +00:00
|
|
|
def done(context):
|
2015-11-13 21:55:27 +00:00
|
|
|
context.remove_contentview(pig_view)
|