remove lxml-dependent code

This commit is contained in:
Maximilian Hils 2016-12-10 10:19:05 +01:00
parent a7ba2f7b46
commit 293b79af91
11 changed files with 30 additions and 130 deletions

View File

@ -27,7 +27,7 @@ test_script:
- ps: |
$Env:VERSION = $(python mitmproxy/version.py)
$Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`""
tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl
tox -e wheel
tox -e rtool -- bdist
deploy_script:

View File

@ -11,7 +11,7 @@ class Injector:
def response(self, flow):
if flow.request.host in self.iframe_url:
return
html = BeautifulSoup(flow.response.content, "lxml")
html = BeautifulSoup(flow.response.content)
if html.body:
iframe = html.new_tag(
"iframe",

View File

@ -22,7 +22,7 @@ from mitmproxy import exceptions
from mitmproxy.net import http
from mitmproxy.utils import strutils
from . import (
auto, raw, hex, json, xml, wbxml, html, javascript, css,
auto, raw, hex, json, html_outline, wbxml, javascript, css,
urlencoded, multipart, image, query, protobuf
)
from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
@ -163,10 +163,8 @@ add(auto.ViewAuto())
add(raw.ViewRaw())
add(hex.ViewHex())
add(json.ViewJSON())
add(xml.ViewXML())
add(wbxml.ViewWBXML())
add(html.ViewHTML())
add(html.ViewHTMLOutline())
add(html_outline.ViewHTMLOutline())
add(javascript.ViewJavaScript())
add(css.ViewCSS())
add(urlencoded.ViewURLEncoded())

View File

@ -1,42 +0,0 @@
import html2text
import lxml.etree
import lxml.html
from mitmproxy.contentviews.base import View, format_text
from mitmproxy.utils import strutils
class ViewHTML(View):
name = "HTML"
prompt = ("html", "h")
content_types = ["text/html"]
def __call__(self, data, **metadata):
if strutils.is_xml(data):
parser = lxml.etree.HTMLParser(
strip_cdata=True,
remove_blank_text=True
)
d = lxml.html.fromstring(data, parser=parser)
docinfo = d.getroottree().docinfo
s = lxml.etree.tostring(
d,
pretty_print=True,
doctype=docinfo.doctype,
encoding='utf8'
)
return "HTML", format_text(s)
class ViewHTMLOutline(View):
name = "HTML Outline"
prompt = ("html outline", "o")
content_types = ["text/html"]
def __call__(self, data, **metadata):
data = data.decode("utf-8", "replace")
h = html2text.HTML2Text(baseurl="")
h.ignore_images = True
h.body_width = 0
outline = h.handle(data)
return "HTML Outline", format_text(outline)

View File

@ -0,0 +1,17 @@
import html2text
from mitmproxy.contentviews import base
class ViewHTMLOutline(base.View):
name = "HTML Outline"
prompt = ("html outline", "o")
content_types = ["text/html"]
def __call__(self, data, **metadata):
data = data.decode("utf-8", "replace")
h = html2text.HTML2Text(baseurl="")
h.ignore_images = True
h.body_width = 0
outline = h.handle(data)
return "HTML Outline", base.format_text(outline)

View File

@ -1,45 +0,0 @@
import lxml.etree
from . import base
class ViewXML(base.View):
name = "XML"
prompt = ("xml", "x")
content_types = ["text/xml"]
def __call__(self, data, **metadata):
parser = lxml.etree.XMLParser(
remove_blank_text=True,
resolve_entities=False,
strip_cdata=False,
recover=False
)
try:
document = lxml.etree.fromstring(data, parser)
except lxml.etree.XMLSyntaxError:
return None
docinfo = document.getroottree().docinfo
prev = []
p = document.getroottree().getroot().getprevious()
while p is not None:
prev.insert(
0,
lxml.etree.tostring(p)
)
p = p.getprevious()
doctype = docinfo.doctype
if prev:
doctype += "\n".join(p.decode() for p in prev).strip()
doctype = doctype.strip()
s = lxml.etree.tostring(
document,
pretty_print=True,
xml_declaration=True,
doctype=doctype or None,
encoding=docinfo.encoding
)
return "XML-like data", base.format_text(s)

View File

@ -1,2 +1 @@
https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5'
-e .[dev,examples,contentviews]

View File

@ -70,7 +70,6 @@ setup(
"html2text>=2016.1.8, <=2016.9.19",
"hyperframe>=4.0.1, <5",
"jsbeautifier>=1.6.3, <1.7",
"lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet.
"Pillow>=3.2, <3.5",
"passlib>=1.6.5, <1.8",
"pyasn1>=0.1.9, <0.2",

View File

@ -1,18 +0,0 @@
from mitmproxy.contentviews import html
from . import full_eval
def test_view_html():
v = full_eval(html.ViewHTML())
s = b"<html><br><br></br><p>one</p></html>"
assert v(s)
s = b"gobbledygook"
assert not v(s)
def test_view_html_outline():
v = full_eval(html.ViewHTMLOutline())
s = b"<html><br><br></br><p>one</p></html>"
assert v(s)
assert v(b'\xfe')

View File

@ -0,0 +1,9 @@
from mitmproxy.contentviews import html_outline
from test.mitmproxy.contentviews import full_eval
def test_view_html_outline():
v = full_eval(html_outline.ViewHTMLOutline())
s = b"<html><br><br></br><p>one</p></html>"
assert v(s)
assert v(b'\xfe')

View File

@ -1,17 +0,0 @@
from mitmproxy.contentviews import xml
from . import full_eval
def test_view_xml():
v = full_eval(xml.ViewXML())
assert v(b"<foo></foo>")
assert not v(b"<foo>")
s = b"""<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet title="XSL_formatting"?>
<rss
xmlns:media="http://search.yahoo.com/mrss/"
xmlns:atom="http://www.w3.org/2005/Atom"
version="2.0">
</rss>
"""
assert v(s)