simplify contentview api

This commit is contained in:
Maximilian Hils 2015-09-12 13:49:16 +02:00
parent 9c31669211
commit 049d253a83
7 changed files with 171 additions and 148 deletions

View File

@ -14,7 +14,7 @@ import traceback
import urwid
import weakref
from .. import controller, flow, script, contentview
from .. import controller, flow, script, contentviews
from . import flowlist, flowview, help, window, signals, options
from . import grideditor, palettes, statusbar, palettepicker
@ -26,7 +26,7 @@ class ConsoleState(flow.State):
flow.State.__init__(self)
self.focus = None
self.follow_focus = None
self.default_body_view = contentview.get("Auto")
self.default_body_view = contentviews.get("Auto")
self.flowsettings = weakref.WeakKeyDictionary()
self.last_search = None
@ -648,7 +648,7 @@ class ConsoleMaster(flow.FlowMaster):
return self.state.set_intercept(txt)
def change_default_display_mode(self, t):
v = contentview.get_by_shortcut(t)
v = contentviews.get_by_shortcut(t)
self.state.default_body_view = v
self.refresh_focus()

View File

@ -1,15 +1,15 @@
from __future__ import absolute_import
import os
import sys
import traceback
import sys
import urwid
from netlib import odict
from netlib.http.semantics import CONTENT_MISSING, Headers
from . import common, grideditor, signals, searchable, tabs
from . import flowdetailview
from .. import utils, controller, contentview
from .. import utils, controller, contentviews
from ..models import HTTPRequest, HTTPResponse, decoded
from ..exceptions import ContentViewException
@ -167,10 +167,10 @@ class FlowView(tabs.Tabs):
if flow == self.flow:
self.show()
def content_view(self, viewmode, conn):
if conn.content == CONTENT_MISSING:
def content_view(self, viewmode, message):
if message.body == CONTENT_MISSING:
msg, body = "", [urwid.Text([("error", "[content missing]")])]
return (msg, body)
return msg, body
else:
full = self.state.get_flow_setting(
self.flow,
@ -180,29 +180,43 @@ class FlowView(tabs.Tabs):
if full:
limit = sys.maxsize
else:
limit = contentview.VIEW_CUTOFF
limit = contentviews.VIEW_CUTOFF
return cache.get(
self._get_content_view,
viewmode,
conn.headers,
conn.content,
limit,
isinstance(conn, HTTPRequest)
message,
limit
)
def _get_content_view(self, viewmode, headers, content, limit, is_request):
def _get_content_view(self, viewmode, message, max_lines):
try:
description, lines = contentview.get_content_view(
viewmode, headers, content, limit, is_request
description, lines = contentviews.get_content_view(
viewmode, message.body, headers=message.headers
)
except ContentViewException:
s = "Content viewer failed: \n" + traceback.format_exc()
signals.add_event(s, "error")
description, lines = contentview.get_content_view(
contentview.get("Raw"), headers, content, limit, is_request
description, lines = contentviews.get_content_view(
contentviews.get("Raw"), message.body, headers=message.headers
)
description = description.replace("Raw", "Couldn't parse: falling back to Raw")
text_objects = [urwid.Text(l) for l in lines]
# Give hint that you have to tab for the response.
if description == "No content" and isinstance(message, HTTPRequest):
description = "No request content (press tab to view response)"
text_objects = []
for line in lines:
text_objects.append(urwid.Text(line))
if len(text_objects) == max_lines:
text_objects.append(urwid.Text([
("highlight", "Stopped displaying data after %d lines. Press " % max_lines),
("key", "f"),
("highlight", " to load all data.")
]))
break
return description, text_objects
def viewmode_get(self):
@ -227,9 +241,7 @@ class FlowView(tabs.Tabs):
[
("heading", msg),
]
)
]
cols.append(
),
urwid.Text(
[
" ",
@ -239,7 +251,7 @@ class FlowView(tabs.Tabs):
],
align="right"
)
)
]
title = urwid.AttrWrap(urwid.Columns(cols), "heading")
txt.append(title)
@ -471,7 +483,7 @@ class FlowView(tabs.Tabs):
self.state.add_flow_setting(
self.flow,
(self.tab_offset, "prettyview"),
contentview.get_by_shortcut(t)
contentviews.get_by_shortcut(t)
)
signals.flow_change.send(self, flow = self.flow)
@ -611,7 +623,7 @@ class FlowView(tabs.Tabs):
scope = "s"
common.ask_copy_part(scope, self.flow, self.master, self.state)
elif key == "m":
p = list(contentview.view_prompts)
p = list(contentviews.view_prompts)
p.insert(0, ("Clear", "C"))
signals.status_prompt_onekey.send(
self,

View File

@ -1,6 +1,6 @@
import urwid
from .. import contentview
from .. import contentviews
from . import common, signals, grideditor
from . import select, palettes
@ -158,7 +158,7 @@ class Options(urwid.WidgetWrap):
self.master.scripts = []
self.master.set_stickyauth(None)
self.master.set_stickycookie(None)
self.master.state.default_body_view = contentview.get("Auto")
self.master.state.default_body_view = contentviews.get("Auto")
signals.update_settings.send(self)
signals.status_message.send(
@ -233,7 +233,7 @@ class Options(urwid.WidgetWrap):
def default_displaymode(self):
signals.status_prompt_onekey.send(
prompt = "Global default display mode",
keys = contentview.view_prompts,
keys = contentviews.view_prompts,
callback = self.master.change_default_display_mode
)

View File

@ -1,4 +1,17 @@
from __future__ import absolute_import
"""
Mitmproxy Content Views
=======================
mitmproxy includes a set of content views which can be used to format/decode/highlight data.
While they are currently used for HTTP message bodies only, the may be used in other contexts
in the future, e.g. to decode protobuf messages sent as WebSocket frames.
Thus, the View API is very minimalistic. The only arguments are `data` and `**metadata`,
where `data` is the actual content (as bytes). The contents on metadata depend on the protocol in
use. For HTTP, the message headers are passed as the ``headers`` keyword argument.
"""
from __future__ import (absolute_import, print_function, division)
import cStringIO
import json
import logging
@ -8,7 +21,6 @@ import sys
import lxml.html
import lxml.etree
from PIL import Image
from PIL.ExifTags import TAGS
import html2text
import six
@ -16,6 +28,7 @@ import six
from netlib.odict import ODict
from netlib import encoding
import netlib.utils
from . import utils
from .exceptions import ContentViewException
from .contrib import jsbeautifier
@ -39,13 +52,15 @@ else:
cssutils.ser.prefs.indentClosingBrace = False
cssutils.ser.prefs.validOnly = False
VIEW_CUTOFF = 1024 * 50
# Default view cutoff *in lines*
VIEW_CUTOFF = 512
KEY_MAX = 30
def format_dict(d):
"""
Transforms the given dictionary into a list of
Helper function that transforms the given dictionary into a list of
("key", key )
("value", value)
tuples, where key is padded to a uniform width.
@ -61,39 +76,38 @@ def format_dict(d):
]
def format_text(content, limit):
def format_text(text):
"""
Transforms the given content into
Helper function that transforms bytes into the view output format.
"""
content = netlib.utils.cleanBin(content)
for line in content[:limit].splitlines():
for line in text.splitlines():
yield [("text", line)]
for msg in trailer(content, limit):
yield msg
def trailer(content, limit):
bytes_removed = len(content) - limit
if bytes_removed > 0:
yield [
("cutoff", "... {} of data not shown.".format(netlib.utils.pretty_size(bytes_removed)))
]
class View(object):
name = None
prompt = ()
content_types = []
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
"""
Transform raw data into human-readable output.
Args:
data: the data to decode/format as bytes.
metadata: optional keyword-only arguments for metadata. Implementations must not
rely on a given argument being present.
Returns:
A (description, content generator) tuple.
The content generator yields lists of (style, text) tuples.
Iit must not yield tuples of tuples, because urwid cannot process that.
The content generator yields lists of (style, text) tuples, where each list represents
a single line. ``text`` is a unfiltered byte string which may need to be escaped,
depending on the used output.
Caveats:
The content generator must not yield tuples of tuples,
because urwid cannot process that. You have to yield a *list* of tuples per line.
"""
raise NotImplementedError()
@ -103,16 +117,17 @@ class ViewAuto(View):
prompt = ("auto", "a")
content_types = []
def __call__(self, hdrs, content, limit):
ctype = hdrs.get("content-type")
def __call__(self, data, **metadata):
headers = metadata.get("headers", {})
ctype = headers.get("content-type")
if ctype:
ct = netlib.utils.parse_content_type(ctype) if ctype else None
ct = "%s/%s" % (ct[0], ct[1])
if ct in content_types_map:
return content_types_map[ct][0](hdrs, content, limit)
elif utils.isXML(content):
return get("XML")(hdrs, content, limit)
return get("Raw")(hdrs, content, limit)
return content_types_map[ct][0](data, **metadata)
elif utils.isXML(data):
return get("XML")(data, **metadata)
return get("Raw")(data)
class ViewRaw(View):
@ -120,8 +135,8 @@ class ViewRaw(View):
prompt = ("raw", "r")
content_types = []
def __call__(self, hdrs, content, limit):
return "Raw", format_text(content, limit)
def __call__(self, data, **metadata):
return "Raw", format_text(data)
class ViewHex(View):
@ -130,18 +145,16 @@ class ViewHex(View):
content_types = []
@staticmethod
def _format(content, limit):
for offset, hexa, s in netlib.utils.hexdump(content[:limit]):
def _format(data):
for offset, hexa, s in netlib.utils.hexdump(data):
yield [
("offset", offset + " "),
("text", hexa + " "),
("text", s)
]
for msg in trailer(content, limit):
yield msg
def __call__(self, hdrs, content, limit):
return "Hex", self._format(content, limit)
def __call__(self, data, **metadata):
return "Hex", self._format(data)
class ViewXML(View):
@ -149,7 +162,7 @@ class ViewXML(View):
prompt = ("xml", "x")
content_types = ["text/xml"]
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
parser = lxml.etree.XMLParser(
remove_blank_text=True,
resolve_entities=False,
@ -157,7 +170,7 @@ class ViewXML(View):
recover=False
)
try:
document = lxml.etree.fromstring(content, parser)
document = lxml.etree.fromstring(data, parser)
except lxml.etree.XMLSyntaxError:
return None
docinfo = document.getroottree().docinfo
@ -183,7 +196,7 @@ class ViewXML(View):
encoding=docinfo.encoding
)
return "XML-like data", format_text(s, limit)
return "XML-like data", format_text(s)
class ViewJSON(View):
@ -191,10 +204,10 @@ class ViewJSON(View):
prompt = ("json", "s")
content_types = ["application/json"]
def __call__(self, hdrs, content, limit):
pretty_json = utils.pretty_json(content)
def __call__(self, data, **metadata):
pretty_json = utils.pretty_json(data)
if pretty_json:
return "JSON", format_text(pretty_json, limit)
return "JSON", format_text(pretty_json)
class ViewHTML(View):
@ -202,20 +215,20 @@ class ViewHTML(View):
prompt = ("html", "h")
content_types = ["text/html"]
def __call__(self, hdrs, content, limit):
if utils.isXML(content):
def __call__(self, data, **metadata):
if utils.isXML(data):
parser = lxml.etree.HTMLParser(
strip_cdata=True,
remove_blank_text=True
)
d = lxml.html.fromstring(content, parser=parser)
d = lxml.html.fromstring(data, parser=parser)
docinfo = d.getroottree().docinfo
s = lxml.etree.tostring(
d,
pretty_print=True,
doctype=docinfo.doctype
)
return "HTML", format_text(s, limit)
return "HTML", format_text(s)
class ViewHTMLOutline(View):
@ -223,13 +236,13 @@ class ViewHTMLOutline(View):
prompt = ("html outline", "o")
content_types = ["text/html"]
def __call__(self, hdrs, content, limit):
content = content.decode("utf-8")
def __call__(self, data, **metadata):
data = data.decode("utf-8")
h = html2text.HTML2Text(baseurl="")
h.ignore_images = True
h.body_width = 0
content = h.handle(content)
return "HTML Outline", format_text(content, limit)
outline = h.handle(data)
return "HTML Outline", format_text(outline)
class ViewURLEncoded(View):
@ -237,8 +250,8 @@ class ViewURLEncoded(View):
prompt = ("urlencoded", "u")
content_types = ["application/x-www-form-urlencoded"]
def __call__(self, hdrs, content, limit):
d = netlib.utils.urldecode(content)
def __call__(self, data, **metadata):
d = netlib.utils.urldecode(data)
return "URLEncoded form", format_dict(ODict(d))
@ -253,8 +266,9 @@ class ViewMultipart(View):
for message in format_dict(ODict(v)):
yield message
def __call__(self, hdrs, content, limit):
v = netlib.utils.multipartdecode(hdrs, content)
def __call__(self, data, **metadata):
headers = metadata.get("headers", {})
v = netlib.utils.multipartdecode(headers, data)
if v:
return "Multipart form", self._format(v)
@ -308,7 +322,7 @@ if pyamf:
else:
return b
def _format(self, envelope, limit):
def _format(self, envelope):
for target, message in iter(envelope):
if isinstance(message, pyamf.remoting.Request):
yield [
@ -322,13 +336,13 @@ if pyamf:
]
s = json.dumps(self.unpack(message), indent=4)
for msg in format_text(s, limit):
for msg in format_text(s):
yield msg
def __call__(self, hdrs, content, limit):
envelope = remoting.decode(content, strict=False)
def __call__(self, data, **metadata):
envelope = remoting.decode(data, strict=False)
if envelope:
return "AMF v%s" % envelope.amfVersion, self._format(envelope, limit)
return "AMF v%s" % envelope.amfVersion, self._format(envelope)
class ViewJavaScript(View):
@ -340,12 +354,11 @@ class ViewJavaScript(View):
"text/javascript"
]
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
opts = jsbeautifier.default_options()
opts.indent_size = 2
res = jsbeautifier.beautify(content[:limit], opts)
cutoff = max(0, len(content) - limit)
return "JavaScript", format_text(res, limit - cutoff)
res = jsbeautifier.beautify(data, opts)
return "JavaScript", format_text(res)
class ViewCSS(View):
@ -355,14 +368,14 @@ class ViewCSS(View):
"text/css"
]
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
if cssutils:
sheet = cssutils.parseString(content)
sheet = cssutils.parseString(data)
beautified = sheet.cssText
else:
beautified = content
beautified = data
return "CSS", format_text(beautified, limit)
return "CSS", format_text(beautified)
class ViewImage(View):
@ -376,9 +389,9 @@ class ViewImage(View):
"image/x-icon",
]
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
try:
img = Image.open(cStringIO.StringIO(content))
img = Image.open(cStringIO.StringIO(data))
except IOError:
return None
parts = [
@ -399,12 +412,7 @@ class ViewImage(View):
parts.append(
(str(tag), str(ex[i]))
)
clean = []
for i in parts:
clean.append(
[netlib.utils.cleanBin(i[0]), netlib.utils.cleanBin(i[1])]
)
fmt = format_dict(ODict(clean))
fmt = format_dict(ODict(parts))
return "%s image" % img.format, fmt
@ -445,9 +453,9 @@ class ViewProtobuf(View):
else:
return err
def __call__(self, hdrs, content, limit):
decoded = self.decode_protobuf(content)
return "Protobuf", format_text(decoded, limit)
def __call__(self, data, **metadata):
decoded = self.decode_protobuf(data)
return "Protobuf", format_text(decoded)
class ViewWBXML(View):
@ -458,13 +466,13 @@ class ViewWBXML(View):
"application/vnd.ms-sync.wbxml"
]
def __call__(self, hdrs, content, limit):
def __call__(self, data, **metadata):
try:
parser = ASCommandResponse(content)
parser = ASCommandResponse(data)
parsedContent = parser.xmlString
if parsedContent:
return "WBXML", format_text(parsedContent, limit)
return "WBXML", format_text(parsedContent)
except:
return None
@ -511,29 +519,31 @@ def get(name):
return i
def get_content_view(viewmode, headers, content, limit, is_request):
def get_content_view(viewmode, data, **metadata):
"""
Args:
viewmode: the view to use.
data, **metadata: arguments passed to View instance.
Returns:
A (description, content generator) tuple.
Raises:
ContentViewException, if the content view threw an error.
"""
if not content:
if is_request:
return "No request content (press tab to view response)", []
else:
return "No content", []
if not data:
return "No content", []
msg = []
headers = metadata.get("headers", {})
enc = headers.get("content-encoding")
if enc and enc != "identity":
decoded = encoding.decode(enc, content)
decoded = encoding.decode(enc, data)
if decoded:
content = decoded
data = decoded
msg.append("[decoded %s]" % enc)
try:
ret = viewmode(headers, content, limit)
ret = viewmode(data, **metadata)
# Third-party viewers can fail in unexpected ways...
except Exception as e:
six.reraise(
@ -542,7 +552,7 @@ def get_content_view(viewmode, headers, content, limit, is_request):
sys.exc_info()[2]
)
if not ret:
ret = get("Raw")(headers, content, limit)
ret = get("Raw")(data, **metadata)
msg.append("Couldn't parse: falling back to Raw")
else:
msg.append(ret[0])

View File

@ -4,11 +4,11 @@ import os
import traceback
import click
import itertools
from netlib.http.semantics import CONTENT_MISSING
import netlib.utils
from . import flow, filt, contentview
from . import flow, filt, contentviews
from .exceptions import ContentViewException
from .models import HTTPRequest
@ -57,6 +57,10 @@ class Options(object):
setattr(self, i, None)
_contentview_auto = contentviews.get("Auto")
_contentview_raw = contentviews.get("Raw")
class DumpMaster(flow.FlowMaster):
def __init__(self, server, options, outfile=sys.stdout):
flow.FlowMaster.__init__(self, server, flow.State())
@ -174,28 +178,24 @@ class DumpMaster(flow.FlowMaster):
)
self.echo(headers, indent=4)
if self.o.flow_detail >= 3:
if message.content == CONTENT_MISSING:
if message.body == CONTENT_MISSING:
self.echo("(content missing)", indent=4)
elif message.content:
elif message.body:
self.echo("")
cutoff = sys.maxsize if self.o.flow_detail >= 4 else contentview.VIEW_CUTOFF
try:
type, lines = contentview.get_content_view(
contentview.get("Auto"),
message.headers,
message.body,
cutoff,
isinstance(message, HTTPRequest)
type, lines = contentviews.get_content_view(
_contentview_auto,
message.body,
headers=message.headers
)
except ContentViewException:
s = "Content viewer failed: \n" + traceback.format_exc()
self.add_event(s, "debug")
type, lines = contentview.get_content_view(
contentview.get("Raw"),
message.headers,
message.body,
cutoff,
isinstance(message, HTTPRequest)
type, lines = contentviews.get_content_view(
_contentview_raw,
message.body,
headers=message.headers
)
styles = dict(
@ -210,10 +210,18 @@ class DumpMaster(flow.FlowMaster):
for (style, text) in line:
yield click.style(text, **styles.get(style, {}))
if self.o.flow_detail == 3:
lines_to_echo = itertools.islice(lines, contentviews.VIEW_CUTOFF)
else:
lines_to_echo = lines
content = "\r\n".join(
"".join(colorful(line)) for line in lines
"".join(colorful(line)) for line in lines_to_echo
)
self.echo(content)
if next(lines, None):
self.echo("(cut off)", indent=4, dim=True)
if self.o.flow_detail >= 2:
self.echo("")

View File

@ -6,7 +6,7 @@ import sys
import netlib.utils
from netlib import encoding
import libmproxy.contentview as cv
import libmproxy.contentviews as cv
import tutils
try:
@ -21,12 +21,6 @@ except:
class TestContentView:
def test_trailer(self):
txt = "X"*10
lines = cv.trailer(txt, 1000)
assert not list(lines)
lines = cv.trailer(txt, 5)
assert list(lines)
def test_view_auto(self):
v = cv.ViewAuto()

View File

@ -1,6 +1,5 @@
import os
from cStringIO import StringIO
from libmproxy.contentview import ViewAuto
from libmproxy.exceptions import ContentViewException
from libmproxy.models import HTTPResponse
@ -51,7 +50,7 @@ def test_strfuncs():
m.echo_flow(flow)
@mock.patch("libmproxy.contentview.get_content_view")
@mock.patch("libmproxy.contentviews.get_content_view")
def test_contentview(get_content_view):
get_content_view.side_effect = ContentViewException(""), ("x", [])