From 293b79af9120c3fc056db60492f88e21a5610ab6 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 10 Dec 2016 10:19:05 +0100 Subject: [PATCH] remove lxml-dependent code --- .appveyor.yml | 2 +- examples/simple/modify_body_inject_iframe.py | 2 +- mitmproxy/contentviews/__init__.py | 6 +-- mitmproxy/contentviews/html.py | 42 ----------------- mitmproxy/contentviews/html_outline.py | 17 +++++++ mitmproxy/contentviews/xml.py | 45 ------------------- requirements.txt | 1 - setup.py | 1 - test/mitmproxy/contentviews/test_html.py | 18 -------- .../contentviews/test_html_outline.py | 9 ++++ test/mitmproxy/contentviews/test_xml.py | 17 ------- 11 files changed, 30 insertions(+), 130 deletions(-) delete mode 100644 mitmproxy/contentviews/html.py create mode 100644 mitmproxy/contentviews/html_outline.py delete mode 100644 mitmproxy/contentviews/xml.py delete mode 100644 test/mitmproxy/contentviews/test_html.py create mode 100644 test/mitmproxy/contentviews/test_html_outline.py delete mode 100644 test/mitmproxy/contentviews/test_xml.py diff --git a/.appveyor.yml b/.appveyor.yml index 5421eb5a9..5cf194a9a 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -27,7 +27,7 @@ test_script: - ps: | $Env:VERSION = $(python mitmproxy/version.py) $Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`"" - tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl + tox -e wheel tox -e rtool -- bdist deploy_script: diff --git a/examples/simple/modify_body_inject_iframe.py b/examples/simple/modify_body_inject_iframe.py index 33d18bbd2..7f9cc9583 100644 --- a/examples/simple/modify_body_inject_iframe.py +++ b/examples/simple/modify_body_inject_iframe.py @@ -11,7 +11,7 @@ class Injector: def response(self, flow): if flow.request.host in self.iframe_url: return - html = BeautifulSoup(flow.response.content, "lxml") + html = BeautifulSoup(flow.response.content) if html.body: iframe = html.new_tag( "iframe", diff --git a/mitmproxy/contentviews/__init__.py b/mitmproxy/contentviews/__init__.py index b83e7aa6f..3857d5e59 100644 --- a/mitmproxy/contentviews/__init__.py +++ b/mitmproxy/contentviews/__init__.py @@ -22,7 +22,7 @@ from mitmproxy import exceptions from mitmproxy.net import http from mitmproxy.utils import strutils from . import ( - auto, raw, hex, json, xml, wbxml, html, javascript, css, + auto, raw, hex, json, html_outline, wbxml, javascript, css, urlencoded, multipart, image, query, protobuf ) from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict @@ -163,10 +163,8 @@ add(auto.ViewAuto()) add(raw.ViewRaw()) add(hex.ViewHex()) add(json.ViewJSON()) -add(xml.ViewXML()) add(wbxml.ViewWBXML()) -add(html.ViewHTML()) -add(html.ViewHTMLOutline()) +add(html_outline.ViewHTMLOutline()) add(javascript.ViewJavaScript()) add(css.ViewCSS()) add(urlencoded.ViewURLEncoded()) diff --git a/mitmproxy/contentviews/html.py b/mitmproxy/contentviews/html.py deleted file mode 100644 index c625beef5..000000000 --- a/mitmproxy/contentviews/html.py +++ /dev/null @@ -1,42 +0,0 @@ -import html2text -import lxml.etree -import lxml.html - -from mitmproxy.contentviews.base import View, format_text -from mitmproxy.utils import strutils - - -class ViewHTML(View): - name = "HTML" - prompt = ("html", "h") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - if strutils.is_xml(data): - parser = lxml.etree.HTMLParser( - strip_cdata=True, - remove_blank_text=True - ) - d = lxml.html.fromstring(data, parser=parser) - docinfo = d.getroottree().docinfo - s = lxml.etree.tostring( - d, - pretty_print=True, - doctype=docinfo.doctype, - encoding='utf8' - ) - return "HTML", format_text(s) - - -class ViewHTMLOutline(View): - name = "HTML Outline" - prompt = ("html outline", "o") - content_types = ["text/html"] - - def __call__(self, data, **metadata): - data = data.decode("utf-8", "replace") - h = html2text.HTML2Text(baseurl="") - h.ignore_images = True - h.body_width = 0 - outline = h.handle(data) - return "HTML Outline", format_text(outline) diff --git a/mitmproxy/contentviews/html_outline.py b/mitmproxy/contentviews/html_outline.py new file mode 100644 index 000000000..d6c51b294 --- /dev/null +++ b/mitmproxy/contentviews/html_outline.py @@ -0,0 +1,17 @@ +import html2text + +from mitmproxy.contentviews import base + + +class ViewHTMLOutline(base.View): + name = "HTML Outline" + prompt = ("html outline", "o") + content_types = ["text/html"] + + def __call__(self, data, **metadata): + data = data.decode("utf-8", "replace") + h = html2text.HTML2Text(baseurl="") + h.ignore_images = True + h.body_width = 0 + outline = h.handle(data) + return "HTML Outline", base.format_text(outline) diff --git a/mitmproxy/contentviews/xml.py b/mitmproxy/contentviews/xml.py deleted file mode 100644 index a382b09d4..000000000 --- a/mitmproxy/contentviews/xml.py +++ /dev/null @@ -1,45 +0,0 @@ -import lxml.etree - -from . import base - - -class ViewXML(base.View): - name = "XML" - prompt = ("xml", "x") - content_types = ["text/xml"] - - def __call__(self, data, **metadata): - parser = lxml.etree.XMLParser( - remove_blank_text=True, - resolve_entities=False, - strip_cdata=False, - recover=False - ) - try: - document = lxml.etree.fromstring(data, parser) - except lxml.etree.XMLSyntaxError: - return None - docinfo = document.getroottree().docinfo - - prev = [] - p = document.getroottree().getroot().getprevious() - while p is not None: - prev.insert( - 0, - lxml.etree.tostring(p) - ) - p = p.getprevious() - doctype = docinfo.doctype - if prev: - doctype += "\n".join(p.decode() for p in prev).strip() - doctype = doctype.strip() - - s = lxml.etree.tostring( - document, - pretty_print=True, - xml_declaration=True, - doctype=doctype or None, - encoding=docinfo.encoding - ) - - return "XML-like data", base.format_text(s) diff --git a/requirements.txt b/requirements.txt index 67a02a979..ab8e8a0be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5' -e .[dev,examples,contentviews] diff --git a/setup.py b/setup.py index 927fbc5e1..4ef89e20e 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,6 @@ setup( "html2text>=2016.1.8, <=2016.9.19", "hyperframe>=4.0.1, <5", "jsbeautifier>=1.6.3, <1.7", - "lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet. "Pillow>=3.2, <3.5", "passlib>=1.6.5, <1.8", "pyasn1>=0.1.9, <0.2", diff --git a/test/mitmproxy/contentviews/test_html.py b/test/mitmproxy/contentviews/test_html.py deleted file mode 100644 index 8d5818e5e..000000000 --- a/test/mitmproxy/contentviews/test_html.py +++ /dev/null @@ -1,18 +0,0 @@ -from mitmproxy.contentviews import html -from . import full_eval - - -def test_view_html(): - v = full_eval(html.ViewHTML()) - s = b"


one

" - assert v(s) - - s = b"gobbledygook" - assert not v(s) - - -def test_view_html_outline(): - v = full_eval(html.ViewHTMLOutline()) - s = b"


one

" - assert v(s) - assert v(b'\xfe') diff --git a/test/mitmproxy/contentviews/test_html_outline.py b/test/mitmproxy/contentviews/test_html_outline.py new file mode 100644 index 000000000..d9ccc4068 --- /dev/null +++ b/test/mitmproxy/contentviews/test_html_outline.py @@ -0,0 +1,9 @@ +from mitmproxy.contentviews import html_outline +from test.mitmproxy.contentviews import full_eval + + +def test_view_html_outline(): + v = full_eval(html_outline.ViewHTMLOutline()) + s = b"


one

" + assert v(s) + assert v(b'\xfe') \ No newline at end of file diff --git a/test/mitmproxy/contentviews/test_xml.py b/test/mitmproxy/contentviews/test_xml.py deleted file mode 100644 index 680134cb4..000000000 --- a/test/mitmproxy/contentviews/test_xml.py +++ /dev/null @@ -1,17 +0,0 @@ -from mitmproxy.contentviews import xml -from . import full_eval - - -def test_view_xml(): - v = full_eval(xml.ViewXML()) - assert v(b"") - assert not v(b"") - s = b""" - - - - """ - assert v(s)