mitmproxy/libmproxy/console/contentview.py
Sahn Lam 3189d144a5 Optional AMF decoding support
If PyAMF is installed, enable AMF decoding.
2012-08-17 18:45:26 -07:00

371 lines
9.5 KiB
Python

import re, cStringIO, traceback
import urwid
from PIL import Image
from PIL.ExifTags import TAGS
import lxml.html, lxml.etree
import common
from .. import utils, encoding, flow
from ..contrib import jsbeautifier, html2text
VIEW_CUTOFF = 1024*50
VIEW_AUTO = 0
VIEW_JSON = 1
VIEW_XML = 2
VIEW_URLENCODED = 3
VIEW_MULTIPART = 4
VIEW_JAVASCRIPT = 5
VIEW_IMAGE = 6
VIEW_RAW = 7
VIEW_HEX = 8
VIEW_HTML = 9
VIEW_OUTLINE = 10
VIEW_AMF = 11
VIEW_NAMES = {
VIEW_AUTO: "Auto",
VIEW_JSON: "JSON",
VIEW_XML: "XML",
VIEW_URLENCODED: "URL-encoded",
VIEW_MULTIPART: "Multipart Form",
VIEW_JAVASCRIPT: "JavaScript",
VIEW_IMAGE: "Image",
VIEW_RAW: "Raw",
VIEW_HEX: "Hex",
VIEW_HTML: "HTML",
VIEW_OUTLINE: "HTML Outline",
}
VIEW_PROMPT = [
("auto detect", "a"),
("hex", "e"),
("html", "h"),
("image", "i"),
("javascript", "j"),
("html outline", "o"),
("json", "s"),
("raw", "r"),
("multipart", "m"),
("urlencoded", "u"),
("xml", "x"),
]
VIEW_SHORTCUTS = {
"a": VIEW_AUTO,
"x": VIEW_XML,
"h": VIEW_HTML,
"i": VIEW_IMAGE,
"j": VIEW_JAVASCRIPT,
"s": VIEW_JSON,
"u": VIEW_URLENCODED,
"m": VIEW_MULTIPART,
"o": VIEW_OUTLINE,
"r": VIEW_RAW,
"e": VIEW_HEX,
}
CONTENT_TYPES_MAP = {
"text/html": VIEW_HTML,
"application/json": VIEW_JSON,
"text/xml": VIEW_XML,
"multipart/form-data": VIEW_MULTIPART,
"application/x-www-form-urlencoded": VIEW_URLENCODED,
"application/x-javascript": VIEW_JAVASCRIPT,
"application/javascript": VIEW_JAVASCRIPT,
"text/javascript": VIEW_JAVASCRIPT,
"image/png": VIEW_IMAGE,
"image/jpeg": VIEW_IMAGE,
"image/gif": VIEW_IMAGE,
"image/vnd.microsoft.icon": VIEW_IMAGE,
"image/x-icon": VIEW_IMAGE,
}
def trailer(clen, txt, limit):
rem = clen - limit
if rem > 0:
txt.append(urwid.Text(""))
txt.append(
urwid.Text(
[
("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)),
("key", "f"),
("highlight", " to load all data.")
]
)
)
def _view_text(content, total, limit):
"""
Generates a body for a chunk of text.
"""
txt = []
for i in utils.cleanBin(content).splitlines():
txt.append(
urwid.Text(("text", i), wrap="any")
)
trailer(total, txt, limit)
return txt
def view_raw(hdrs, content, limit):
txt = _view_text(content[:limit], len(content), limit)
return "Raw", txt
def view_hex(hdrs, content, limit):
txt = []
for offset, hexa, s in utils.hexdump(content[:limit]):
txt.append(urwid.Text([
("offset", offset),
" ",
("text", hexa),
" ",
("text", s),
]))
trailer(len(content), txt, limit)
return "Hex", txt
def view_xml(hdrs, content, limit):
parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
try:
document = lxml.etree.fromstring(content, parser)
except lxml.etree.XMLSyntaxError:
return None
docinfo = document.getroottree().docinfo
prev = []
p = document.getroottree().getroot().getprevious()
while p is not None:
prev.insert(
0,
lxml.etree.tostring(p)
)
p = p.getprevious()
doctype=docinfo.doctype
if prev:
doctype += "\n".join(prev).strip()
doctype = doctype.strip()
s = lxml.etree.tostring(
document,
pretty_print=True,
xml_declaration=True,
doctype=doctype or None,
encoding = docinfo.encoding
)
txt = []
for i in s[:limit].strip().split("\n"):
txt.append(
urwid.Text(("text", i)),
)
trailer(len(content), txt, limit)
return "XML-like data", txt
def view_html(hdrs, content, limit):
if utils.isXML(content):
parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
d = lxml.html.fromstring(content, parser=parser)
docinfo = d.getroottree().docinfo
s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
return "HTML", _view_text(s[:limit], len(s), limit)
def view_outline(hdrs, content, limit):
content = content.decode("utf-8")
h = html2text.HTML2Text(baseurl="")
h.ignore_images = True
h.body_width = 0
content = h.handle(content)
txt = _view_text(content[:limit], len(content), limit)
return "HTML Outline", txt
def view_json(hdrs, content, limit):
lines = utils.pretty_json(content)
if lines:
txt = []
sofar = 0
for i in lines:
sofar += len(i)
txt.append(
urwid.Text(("text", i)),
)
if sofar > limit:
break
trailer(sum(len(i) for i in lines), txt, limit)
return "JSON", txt
def view_multipart(hdrs, content, limit):
v = hdrs.get("content-type")
if v:
v = utils.parse_content_type(v[0])
if not v:
return
boundary = v[2].get("boundary")
if not boundary:
return
rx = re.compile(r'\bname="([^"]+)"')
keys = []
vals = []
for i in content.split("--" + boundary):
parts = i.splitlines()
if len(parts) > 1 and parts[0][0:2] != "--":
match = rx.search(parts[1])
if match:
keys.append(match.group(1) + ":")
vals.append(utils.cleanBin(
"\n".join(parts[3+parts[2:].index(""):])
))
r = [
urwid.Text(("highlight", "Form data:\n")),
]
r.extend(common.format_keyvals(
zip(keys, vals),
key = "header",
val = "text"
))
return "Multipart form", r
def view_urlencoded(hdrs, content, limit):
lines = utils.urldecode(content)
if lines:
body = common.format_keyvals(
[(k+":", v) for (k, v) in lines],
key = "header",
val = "text"
)
return "URLEncoded form", body
def view_javascript(hdrs, content, limit):
opts = jsbeautifier.default_options()
opts.indent_size = 2
res = jsbeautifier.beautify(content[:limit], opts)
return "JavaScript", _view_text(res, len(content), limit)
def view_image(hdrs, content, limit):
try:
img = Image.open(cStringIO.StringIO(content))
except IOError:
return None
parts = [
("Format", str(img.format_description)),
("Size", "%s x %s px"%img.size),
("Mode", str(img.mode)),
]
for i in sorted(img.info.keys()):
if i != "exif":
parts.append(
(str(i), str(img.info[i]))
)
if hasattr(img, "_getexif"):
ex = img._getexif()
if ex:
for i in sorted(ex.keys()):
tag = TAGS.get(i, i)
parts.append(
(str(tag), str(ex[i]))
)
clean = []
for i in parts:
clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])
fmt = common.format_keyvals(
clean,
key = "header",
val = "text"
)
return "%s image"%img.format, fmt
def view_amf(hdrs, content, limit):
s = utils.pretty_amf(content)
if s:
return "AMF", _view_text(s[:limit], len(s), limit)
PRETTY_FUNCTION_MAP = {
VIEW_XML: view_xml,
VIEW_HTML: view_html,
VIEW_JSON: view_json,
VIEW_URLENCODED: view_urlencoded,
VIEW_MULTIPART: view_multipart,
VIEW_JAVASCRIPT: view_javascript,
VIEW_IMAGE: view_image,
VIEW_HEX: view_hex,
VIEW_RAW: view_raw,
VIEW_OUTLINE: view_outline,
}
def get_view_func(viewmode, hdrs, content):
"""
Returns a function object.
"""
if viewmode == VIEW_AUTO:
ctype = hdrs.get("content-type")
if ctype:
ctype = ctype[0]
ct = utils.parse_content_type(ctype) if ctype else None
if ct:
viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1]))
if not viewmode and utils.isXML(content):
viewmode = VIEW_XML
return PRETTY_FUNCTION_MAP.get(viewmode, view_raw)
def get_content_view(viewmode, hdrItems, content, limit):
"""
Returns a (msg, body) tuple.
"""
msg = []
hdrs = flow.ODictCaseless([list(i) for i in hdrItems])
enc = hdrs.get("content-encoding")
if enc and enc[0] != "identity":
decoded = encoding.decode(enc[0], content)
if decoded:
content = decoded
msg.append("[decoded %s]"%enc[0])
func = get_view_func(viewmode, hdrs, content)
try:
ret = func(hdrs, content, limit)
# Third-party viewers can fail in unexpected ways...
except Exception, e:
s = traceback.format_exc()
return "", _view_text(s, len(s), len(s))
ret = None
if not ret:
viewmode = VIEW_RAW
ret = view_raw(hdrs, content, limit)
msg.append("Couldn't parse: falling back to Raw")
else:
msg.append(ret[0])
return " ".join(msg), ret[1]
#
# Enable optional decoding methods at runtime
#
# AMF decoding requires pyamf
try:
import pyamf
VIEW_SHORTCUTS["f"] = VIEW_AMF
VIEW_PROMPT.append(("amf", "f"))
VIEW_NAMES[VIEW_AMF] = "AMF"
CONTENT_TYPES_MAP["application/x-amf"] = VIEW_AMF
PRETTY_FUNCTION_MAP[VIEW_AMF] = view_amf
except ImportError:
pass