mitmproxy/libmproxy/console/contentview.py

332 lines
8.5 KiB
Python
Raw Normal View History

import re, cStringIO
import urwid
from PIL import Image
from PIL.ExifTags import TAGS
import lxml.html, lxml.etree
import common
from .. import utils, encoding, flow
from ..contrib import jsbeautifier
VIEW_CUTOFF = 1024*50
VIEW_AUTO = 0
VIEW_JSON = 1
VIEW_XML = 2
VIEW_URLENCODED = 3
VIEW_MULTIPART = 4
VIEW_JAVASCRIPT = 5
VIEW_IMAGE = 6
VIEW_RAW = 7
VIEW_HEX = 8
VIEW_HTML = 9
VIEW_NAMES = {
VIEW_AUTO: "Auto",
VIEW_JSON: "JSON",
VIEW_XML: "XML",
VIEW_URLENCODED: "URL-encoded",
VIEW_MULTIPART: "Multipart Form",
VIEW_JAVASCRIPT: "JavaScript",
VIEW_IMAGE: "Image",
VIEW_RAW: "Raw",
VIEW_HEX: "Hex",
VIEW_HTML: "HTML",
}
VIEW_PROMPT = (
("auto detect", "a"),
("hex", "e"),
("html", "h"),
("image", "i"),
("javascript", "j"),
("json", "s"),
("raw", "r"),
("multipart", "m"),
("urlencoded", "u"),
("xml", "x"),
)
VIEW_SHORTCUTS = {
"a": VIEW_AUTO,
"x": VIEW_XML,
"h": VIEW_HTML,
"i": VIEW_IMAGE,
"j": VIEW_JAVASCRIPT,
"s": VIEW_JSON,
"u": VIEW_URLENCODED,
"m": VIEW_MULTIPART,
"r": VIEW_RAW,
"e": VIEW_HEX,
}
CONTENT_TYPES_MAP = {
"text/html": VIEW_HTML,
"application/json": VIEW_JSON,
"text/xml": VIEW_XML,
"multipart/form-data": VIEW_MULTIPART,
"application/x-www-form-urlencoded": VIEW_URLENCODED,
"application/x-javascript": VIEW_JAVASCRIPT,
"application/javascript": VIEW_JAVASCRIPT,
"text/javascript": VIEW_JAVASCRIPT,
"image/png": VIEW_IMAGE,
"image/jpeg": VIEW_IMAGE,
"image/gif": VIEW_IMAGE,
"image/vnd.microsoft.icon": VIEW_IMAGE,
"image/x-icon": VIEW_IMAGE,
}
def trailer(clen, txt, limit):
rem = clen - limit
if rem > 0:
txt.append(urwid.Text(""))
txt.append(
urwid.Text(
[
("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)),
("key", "f"),
("highlight", " to load all data.")
]
)
)
def _view_text(content, total, limit):
"""
Generates a body for a chunk of text.
"""
txt = []
for i in utils.cleanBin(content).splitlines():
txt.append(
urwid.Text(("text", i), wrap="any")
)
trailer(total, txt, limit)
return txt
def view_raw(hdrs, content, limit):
txt = _view_text(content[:limit], len(content), limit)
return "Raw", txt
def view_hex(hdrs, content, limit):
txt = []
for offset, hexa, s in utils.hexdump(content[:limit]):
txt.append(urwid.Text([
("offset", offset),
" ",
("text", hexa),
" ",
("text", s),
]))
trailer(len(content), txt, limit)
return "Hex", txt
def view_xml(hdrs, content, limit):
parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
try:
document = lxml.etree.fromstring(content, parser)
2012-04-08 23:42:56 +00:00
except lxml.etree.XMLSyntaxError:
return None
docinfo = document.getroottree().docinfo
prev = []
p = document.getroottree().getroot().getprevious()
while p is not None:
prev.insert(
0,
lxml.etree.tostring(p)
)
p = p.getprevious()
2012-04-07 10:15:31 +00:00
doctype=docinfo.doctype
if prev:
doctype += "\n".join(prev).strip()
doctype = doctype.strip()
s = lxml.etree.tostring(
document,
pretty_print=True,
xml_declaration=True,
2012-04-07 10:15:31 +00:00
doctype=doctype or None,
encoding = docinfo.encoding
)
txt = []
for i in s[:limit].strip().split("\n"):
txt.append(
urwid.Text(("text", i)),
)
trailer(len(content), txt, limit)
return "XML-like data", txt
def view_html(hdrs, content, limit):
if utils.isXML(content):
parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
d = lxml.html.fromstring(content, parser=parser)
docinfo = d.getroottree().docinfo
s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
return "HTML", _view_text(s[:limit], len(s), limit)
def view_json(hdrs, content, limit):
lines = utils.pretty_json(content)
if lines:
txt = []
sofar = 0
for i in lines:
sofar += len(i)
txt.append(
urwid.Text(("text", i)),
)
if sofar > limit:
break
trailer(sum(len(i) for i in lines), txt, limit)
return "JSON", txt
def view_multipart(hdrs, content, limit):
v = hdrs.get("content-type")
if v:
v = utils.parse_content_type(v[0])
if not v:
return
boundary = v[2].get("boundary")
if not boundary:
return
rx = re.compile(r'\bname="([^"]+)"')
keys = []
vals = []
for i in content.split("--" + boundary):
parts = i.splitlines()
if len(parts) > 1 and parts[0][0:2] != "--":
match = rx.search(parts[1])
if match:
keys.append(match.group(1) + ":")
vals.append(utils.cleanBin(
"\n".join(parts[3+parts[2:].index(""):])
))
r = [
urwid.Text(("highlight", "Form data:\n")),
]
r.extend(common.format_keyvals(
zip(keys, vals),
key = "header",
val = "text"
))
return "Multipart form", r
def view_urlencoded(hdrs, content, limit):
lines = utils.urldecode(content)
if lines:
body = common.format_keyvals(
[(k+":", v) for (k, v) in lines],
key = "header",
val = "text"
)
return "URLEncoded form", body
def view_javascript(hdrs, content, limit):
opts = jsbeautifier.default_options()
opts.indent_size = 2
try:
res = jsbeautifier.beautify(content[:limit], opts)
# begin nocover
except:
# Bugs in jsbeautifier mean that it can trhow arbitrary errors.
return None
# end nocover
return "JavaScript", _view_text(res, len(content), limit)
def view_image(hdrs, content, limit):
try:
img = Image.open(cStringIO.StringIO(content))
except IOError:
return None
parts = [
("Format", str(img.format_description)),
("Size", "%s x %s px"%img.size),
("Mode", str(img.mode)),
]
for i in sorted(img.info.keys()):
if i != "exif":
parts.append(
(str(i), str(img.info[i]))
)
if hasattr(img, "_getexif"):
ex = img._getexif()
if ex:
for i in sorted(ex.keys()):
tag = TAGS.get(i, i)
parts.append(
(str(tag), str(ex[i]))
)
clean = []
for i in parts:
clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])
fmt = common.format_keyvals(
clean,
key = "header",
val = "text"
)
return "%s image"%img.format, fmt
PRETTY_FUNCTION_MAP = {
VIEW_XML: view_xml,
VIEW_HTML: view_html,
VIEW_JSON: view_json,
VIEW_URLENCODED: view_urlencoded,
VIEW_MULTIPART: view_multipart,
VIEW_JAVASCRIPT: view_javascript,
VIEW_IMAGE: view_image,
VIEW_HEX: view_hex,
VIEW_RAW: view_raw,
}
def get_view_func(viewmode, hdrs, content):
"""
Returns a function object.
"""
if viewmode == VIEW_AUTO:
ctype = hdrs.get("content-type")
if ctype:
ctype = ctype[0]
ct = utils.parse_content_type(ctype) if ctype else None
if ct:
viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1]))
if not viewmode and utils.isXML(content):
viewmode = VIEW_XML
return PRETTY_FUNCTION_MAP.get(viewmode, view_raw)
def get_content_view(viewmode, hdrItems, content, limit):
"""
Returns a (msg, body) tuple.
"""
msg = []
hdrs = flow.ODictCaseless([list(i) for i in hdrItems])
enc = hdrs.get("content-encoding")
if enc and enc[0] != "identity":
decoded = encoding.decode(enc[0], content)
if decoded:
content = decoded
msg.append("[decoded %s]"%enc[0])
func = get_view_func(viewmode, hdrs, content)
ret = func(hdrs, content, limit)
if not ret:
viewmode = VIEW_RAW
ret = view_raw(hdrs, content, limit)
msg.append("Couldn't parse: falling back to Raw")
else:
msg.append(ret[0])
return " ".join(msg), ret[1]