remove lxml-dependent code

2025-01-30 14:58:38 +00:00 · 2016-12-10 10:19:05 +01:00 · 2016-12-10 10:19:05 +01:00 · 293b79af91
commit 293b79af91
parent a7ba2f7b46
11 changed files with 30 additions and 130 deletions
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -27,7 +27,7 @@ test_script:
  - ps: |
      $Env:VERSION = $(python mitmproxy/version.py)
      $Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`""
-      tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl
+      tox -e wheel
      tox -e rtool -- bdist

 deploy_script:
--- a/examples/simple/modify_body_inject_iframe.py
+++ b/examples/simple/modify_body_inject_iframe.py
@ -11,7 +11,7 @@ class Injector:
    def response(self, flow):
        if flow.request.host in self.iframe_url:
            return
-        html = BeautifulSoup(flow.response.content, "lxml")
+        html = BeautifulSoup(flow.response.content)
        if html.body:
            iframe = html.new_tag(
                "iframe",
--- a/mitmproxy/contentviews/init.py
+++ b/mitmproxy/contentviews/init.py
@ -22,7 +22,7 @@ from mitmproxy import exceptions
 from mitmproxy.net import http
 from mitmproxy.utils import strutils
 from . import (
-    auto, raw, hex, json, xml, wbxml, html, javascript, css,
+    auto, raw, hex, json, html_outline, wbxml, javascript, css,
    urlencoded, multipart, image, query, protobuf
 )
 from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
@ -163,10 +163,8 @@ add(auto.ViewAuto())
 add(raw.ViewRaw())
 add(hex.ViewHex())
 add(json.ViewJSON())
-add(xml.ViewXML())
 add(wbxml.ViewWBXML())
-add(html.ViewHTML())
-add(html.ViewHTMLOutline())
+add(html_outline.ViewHTMLOutline())
 add(javascript.ViewJavaScript())
 add(css.ViewCSS())
 add(urlencoded.ViewURLEncoded())
--- a/mitmproxy/contentviews/html.py
+++ b/mitmproxy/contentviews/html.py
@ -1,42 +0,0 @@
-import html2text
-import lxml.etree
-import lxml.html
-
-from mitmproxy.contentviews.base import View, format_text
-from mitmproxy.utils import strutils
-
-
-class ViewHTML(View):
-    name = "HTML"
-    prompt = ("html", "h")
-    content_types = ["text/html"]
-
-    def __call__(self, data, **metadata):
-        if strutils.is_xml(data):
-            parser = lxml.etree.HTMLParser(
-                strip_cdata=True,
-                remove_blank_text=True
-            )
-            d = lxml.html.fromstring(data, parser=parser)
-            docinfo = d.getroottree().docinfo
-            s = lxml.etree.tostring(
-                d,
-                pretty_print=True,
-                doctype=docinfo.doctype,
-                encoding='utf8'
-            )
-            return "HTML", format_text(s)
-
-
-class ViewHTMLOutline(View):
-    name = "HTML Outline"
-    prompt = ("html outline", "o")
-    content_types = ["text/html"]
-
-    def __call__(self, data, **metadata):
-        data = data.decode("utf-8", "replace")
-        h = html2text.HTML2Text(baseurl="")
-        h.ignore_images = True
-        h.body_width = 0
-        outline = h.handle(data)
-        return "HTML Outline", format_text(outline)
--- a/mitmproxy/contentviews/html_outline.py
+++ b/mitmproxy/contentviews/html_outline.py
@ -0,0 +1,17 @@
+import html2text
+
+from mitmproxy.contentviews import base
+
+
+class ViewHTMLOutline(base.View):
+    name = "HTML Outline"
+    prompt = ("html outline", "o")
+    content_types = ["text/html"]
+
+    def __call__(self, data, **metadata):
+        data = data.decode("utf-8", "replace")
+        h = html2text.HTML2Text(baseurl="")
+        h.ignore_images = True
+        h.body_width = 0
+        outline = h.handle(data)
+        return "HTML Outline", base.format_text(outline)
--- a/mitmproxy/contentviews/xml.py
+++ b/mitmproxy/contentviews/xml.py
@ -1,45 +0,0 @@
-import lxml.etree
-
-from . import base
-
-
-class ViewXML(base.View):
-    name = "XML"
-    prompt = ("xml", "x")
-    content_types = ["text/xml"]
-
-    def __call__(self, data, **metadata):
-        parser = lxml.etree.XMLParser(
-            remove_blank_text=True,
-            resolve_entities=False,
-            strip_cdata=False,
-            recover=False
-        )
-        try:
-            document = lxml.etree.fromstring(data, parser)
-        except lxml.etree.XMLSyntaxError:
-            return None
-        docinfo = document.getroottree().docinfo
-
-        prev = []
-        p = document.getroottree().getroot().getprevious()
-        while p is not None:
-            prev.insert(
-                0,
-                lxml.etree.tostring(p)
-            )
-            p = p.getprevious()
-        doctype = docinfo.doctype
-        if prev:
-            doctype += "\n".join(p.decode() for p in prev).strip()
-        doctype = doctype.strip()
-
-        s = lxml.etree.tostring(
-            document,
-            pretty_print=True,
-            xml_declaration=True,
-            doctype=doctype or None,
-            encoding=docinfo.encoding
-        )
-
-        return "XML-like data", base.format_text(s)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1 @@
-https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5'
 -e .[dev,examples,contentviews]
--- a/setup.py
+++ b/setup.py
@ -70,7 +70,6 @@ setup(
        "html2text>=2016.1.8, <=2016.9.19",
        "hyperframe>=4.0.1, <5",
        "jsbeautifier>=1.6.3, <1.7",
-        "lxml>=3.5.0, <=3.6.0",  # no wheels for 3.6.1 yet.
        "Pillow>=3.2, <3.5",
        "passlib>=1.6.5, <1.8",
        "pyasn1>=0.1.9, <0.2",
--- a/test/mitmproxy/contentviews/test_html.py
+++ b/test/mitmproxy/contentviews/test_html.py
@ -1,18 +0,0 @@
-from mitmproxy.contentviews import html
-from . import full_eval
-
-
-def test_view_html():
-    v = full_eval(html.ViewHTML())
-    s = b"<html><br><br></br><p>one</p></html>"
-    assert v(s)
-
-    s = b"gobbledygook"
-    assert not v(s)
-
-
-def test_view_html_outline():
-    v = full_eval(html.ViewHTMLOutline())
-    s = b"<html><br><br></br><p>one</p></html>"
-    assert v(s)
-    assert v(b'\xfe')
--- a/test/mitmproxy/contentviews/test_html_outline.py
+++ b/test/mitmproxy/contentviews/test_html_outline.py
@ -0,0 +1,9 @@
+from mitmproxy.contentviews import html_outline
+from test.mitmproxy.contentviews import full_eval
+
+
+def test_view_html_outline():
+    v = full_eval(html_outline.ViewHTMLOutline())
+    s = b"<html><br><br></br><p>one</p></html>"
+    assert v(s)
+    assert v(b'\xfe')
--- a/test/mitmproxy/contentviews/test_xml.py
+++ b/test/mitmproxy/contentviews/test_xml.py
@ -1,17 +0,0 @@
-from mitmproxy.contentviews import xml
-from . import full_eval
-
-
-def test_view_xml():
-    v = full_eval(xml.ViewXML())
-    assert v(b"<foo></foo>")
-    assert not v(b"<foo>")
-    s = b"""<?xml version="1.0" encoding="UTF-8"?>
-        <?xml-stylesheet title="XSL_formatting"?>
-        <rss
-            xmlns:media="http://search.yahoo.com/mrss/"
-            xmlns:atom="http://www.w3.org/2005/Atom"
-            version="2.0">
-        </rss>
-    """
-    assert v(s)