diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index b75fa5d8b..3bc0c0912 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -14,7 +14,7 @@ import traceback import urwid import weakref -from .. import controller, flow, script, contentview +from .. import controller, flow, script, contentviews from . import flowlist, flowview, help, window, signals, options from . import grideditor, palettes, statusbar, palettepicker @@ -26,7 +26,7 @@ class ConsoleState(flow.State): flow.State.__init__(self) self.focus = None self.follow_focus = None - self.default_body_view = contentview.get("Auto") + self.default_body_view = contentviews.get("Auto") self.flowsettings = weakref.WeakKeyDictionary() self.last_search = None @@ -648,7 +648,7 @@ class ConsoleMaster(flow.FlowMaster): return self.state.set_intercept(txt) def change_default_display_mode(self, t): - v = contentview.get_by_shortcut(t) + v = contentviews.get_by_shortcut(t) self.state.default_body_view = v self.refresh_focus() diff --git a/libmproxy/console/flowview.py b/libmproxy/console/flowview.py index 192b1e5ba..3e13fab4c 100644 --- a/libmproxy/console/flowview.py +++ b/libmproxy/console/flowview.py @@ -1,15 +1,15 @@ from __future__ import absolute_import import os -import sys import traceback +import sys + import urwid from netlib import odict from netlib.http.semantics import CONTENT_MISSING, Headers - from . import common, grideditor, signals, searchable, tabs from . import flowdetailview -from .. import utils, controller, contentview +from .. import utils, controller, contentviews from ..models import HTTPRequest, HTTPResponse, decoded from ..exceptions import ContentViewException @@ -167,10 +167,10 @@ class FlowView(tabs.Tabs): if flow == self.flow: self.show() - def content_view(self, viewmode, conn): - if conn.content == CONTENT_MISSING: + def content_view(self, viewmode, message): + if message.body == CONTENT_MISSING: msg, body = "", [urwid.Text([("error", "[content missing]")])] - return (msg, body) + return msg, body else: full = self.state.get_flow_setting( self.flow, @@ -180,29 +180,43 @@ class FlowView(tabs.Tabs): if full: limit = sys.maxsize else: - limit = contentview.VIEW_CUTOFF + limit = contentviews.VIEW_CUTOFF return cache.get( self._get_content_view, viewmode, - conn.headers, - conn.content, - limit, - isinstance(conn, HTTPRequest) + message, + limit ) - def _get_content_view(self, viewmode, headers, content, limit, is_request): + def _get_content_view(self, viewmode, message, max_lines): + try: - description, lines = contentview.get_content_view( - viewmode, headers, content, limit, is_request + description, lines = contentviews.get_content_view( + viewmode, message.body, headers=message.headers ) except ContentViewException: s = "Content viewer failed: \n" + traceback.format_exc() signals.add_event(s, "error") - description, lines = contentview.get_content_view( - contentview.get("Raw"), headers, content, limit, is_request + description, lines = contentviews.get_content_view( + contentviews.get("Raw"), message.body, headers=message.headers ) description = description.replace("Raw", "Couldn't parse: falling back to Raw") - text_objects = [urwid.Text(l) for l in lines] + + # Give hint that you have to tab for the response. + if description == "No content" and isinstance(message, HTTPRequest): + description = "No request content (press tab to view response)" + + text_objects = [] + for line in lines: + text_objects.append(urwid.Text(line)) + if len(text_objects) == max_lines: + text_objects.append(urwid.Text([ + ("highlight", "Stopped displaying data after %d lines. Press " % max_lines), + ("key", "f"), + ("highlight", " to load all data.") + ])) + break + return description, text_objects def viewmode_get(self): @@ -227,9 +241,7 @@ class FlowView(tabs.Tabs): [ ("heading", msg), ] - ) - ] - cols.append( + ), urwid.Text( [ " ", @@ -239,7 +251,7 @@ class FlowView(tabs.Tabs): ], align="right" ) - ) + ] title = urwid.AttrWrap(urwid.Columns(cols), "heading") txt.append(title) @@ -471,7 +483,7 @@ class FlowView(tabs.Tabs): self.state.add_flow_setting( self.flow, (self.tab_offset, "prettyview"), - contentview.get_by_shortcut(t) + contentviews.get_by_shortcut(t) ) signals.flow_change.send(self, flow = self.flow) @@ -611,7 +623,7 @@ class FlowView(tabs.Tabs): scope = "s" common.ask_copy_part(scope, self.flow, self.master, self.state) elif key == "m": - p = list(contentview.view_prompts) + p = list(contentviews.view_prompts) p.insert(0, ("Clear", "C")) signals.status_prompt_onekey.send( self, diff --git a/libmproxy/console/options.py b/libmproxy/console/options.py index 0948e96da..a365a78c9 100644 --- a/libmproxy/console/options.py +++ b/libmproxy/console/options.py @@ -1,6 +1,6 @@ import urwid -from .. import contentview +from .. import contentviews from . import common, signals, grideditor from . import select, palettes @@ -158,7 +158,7 @@ class Options(urwid.WidgetWrap): self.master.scripts = [] self.master.set_stickyauth(None) self.master.set_stickycookie(None) - self.master.state.default_body_view = contentview.get("Auto") + self.master.state.default_body_view = contentviews.get("Auto") signals.update_settings.send(self) signals.status_message.send( @@ -233,7 +233,7 @@ class Options(urwid.WidgetWrap): def default_displaymode(self): signals.status_prompt_onekey.send( prompt = "Global default display mode", - keys = contentview.view_prompts, + keys = contentviews.view_prompts, callback = self.master.change_default_display_mode ) diff --git a/libmproxy/contentview.py b/libmproxy/contentviews.py similarity index 71% rename from libmproxy/contentview.py rename to libmproxy/contentviews.py index 219adfb7c..a356b35de 100644 --- a/libmproxy/contentview.py +++ b/libmproxy/contentviews.py @@ -1,4 +1,17 @@ -from __future__ import absolute_import +""" +Mitmproxy Content Views +======================= + +mitmproxy includes a set of content views which can be used to format/decode/highlight data. +While they are currently used for HTTP message bodies only, the may be used in other contexts +in the future, e.g. to decode protobuf messages sent as WebSocket frames. + +Thus, the View API is very minimalistic. The only arguments are `data` and `**metadata`, +where `data` is the actual content (as bytes). The contents on metadata depend on the protocol in +use. For HTTP, the message headers are passed as the ``headers`` keyword argument. + +""" +from __future__ import (absolute_import, print_function, division) import cStringIO import json import logging @@ -8,7 +21,6 @@ import sys import lxml.html import lxml.etree from PIL import Image - from PIL.ExifTags import TAGS import html2text import six @@ -16,6 +28,7 @@ import six from netlib.odict import ODict from netlib import encoding import netlib.utils + from . import utils from .exceptions import ContentViewException from .contrib import jsbeautifier @@ -39,13 +52,15 @@ else: cssutils.ser.prefs.indentClosingBrace = False cssutils.ser.prefs.validOnly = False -VIEW_CUTOFF = 1024 * 50 +# Default view cutoff *in lines* +VIEW_CUTOFF = 512 + KEY_MAX = 30 def format_dict(d): """ - Transforms the given dictionary into a list of + Helper function that transforms the given dictionary into a list of ("key", key ) ("value", value) tuples, where key is padded to a uniform width. @@ -61,39 +76,38 @@ def format_dict(d): ] -def format_text(content, limit): +def format_text(text): """ - Transforms the given content into + Helper function that transforms bytes into the view output format. """ - content = netlib.utils.cleanBin(content) - - for line in content[:limit].splitlines(): + for line in text.splitlines(): yield [("text", line)] - for msg in trailer(content, limit): - yield msg - - -def trailer(content, limit): - bytes_removed = len(content) - limit - if bytes_removed > 0: - yield [ - ("cutoff", "... {} of data not shown.".format(netlib.utils.pretty_size(bytes_removed))) - ] - class View(object): name = None prompt = () content_types = [] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): """ + Transform raw data into human-readable output. + + Args: + data: the data to decode/format as bytes. + metadata: optional keyword-only arguments for metadata. Implementations must not + rely on a given argument being present. + Returns: A (description, content generator) tuple. - The content generator yields lists of (style, text) tuples. - Iit must not yield tuples of tuples, because urwid cannot process that. + The content generator yields lists of (style, text) tuples, where each list represents + a single line. ``text`` is a unfiltered byte string which may need to be escaped, + depending on the used output. + + Caveats: + The content generator must not yield tuples of tuples, + because urwid cannot process that. You have to yield a *list* of tuples per line. """ raise NotImplementedError() @@ -103,16 +117,17 @@ class ViewAuto(View): prompt = ("auto", "a") content_types = [] - def __call__(self, hdrs, content, limit): - ctype = hdrs.get("content-type") + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + ctype = headers.get("content-type") if ctype: ct = netlib.utils.parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: - return content_types_map[ct][0](hdrs, content, limit) - elif utils.isXML(content): - return get("XML")(hdrs, content, limit) - return get("Raw")(hdrs, content, limit) + return content_types_map[ct][0](data, **metadata) + elif utils.isXML(data): + return get("XML")(data, **metadata) + return get("Raw")(data) class ViewRaw(View): @@ -120,8 +135,8 @@ class ViewRaw(View): prompt = ("raw", "r") content_types = [] - def __call__(self, hdrs, content, limit): - return "Raw", format_text(content, limit) + def __call__(self, data, **metadata): + return "Raw", format_text(data) class ViewHex(View): @@ -130,18 +145,16 @@ class ViewHex(View): content_types = [] @staticmethod - def _format(content, limit): - for offset, hexa, s in netlib.utils.hexdump(content[:limit]): + def _format(data): + for offset, hexa, s in netlib.utils.hexdump(data): yield [ ("offset", offset + " "), ("text", hexa + " "), ("text", s) ] - for msg in trailer(content, limit): - yield msg - def __call__(self, hdrs, content, limit): - return "Hex", self._format(content, limit) + def __call__(self, data, **metadata): + return "Hex", self._format(data) class ViewXML(View): @@ -149,7 +162,7 @@ class ViewXML(View): prompt = ("xml", "x") content_types = ["text/xml"] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): parser = lxml.etree.XMLParser( remove_blank_text=True, resolve_entities=False, @@ -157,7 +170,7 @@ class ViewXML(View): recover=False ) try: - document = lxml.etree.fromstring(content, parser) + document = lxml.etree.fromstring(data, parser) except lxml.etree.XMLSyntaxError: return None docinfo = document.getroottree().docinfo @@ -183,7 +196,7 @@ class ViewXML(View): encoding=docinfo.encoding ) - return "XML-like data", format_text(s, limit) + return "XML-like data", format_text(s) class ViewJSON(View): @@ -191,10 +204,10 @@ class ViewJSON(View): prompt = ("json", "s") content_types = ["application/json"] - def __call__(self, hdrs, content, limit): - pretty_json = utils.pretty_json(content) + def __call__(self, data, **metadata): + pretty_json = utils.pretty_json(data) if pretty_json: - return "JSON", format_text(pretty_json, limit) + return "JSON", format_text(pretty_json) class ViewHTML(View): @@ -202,20 +215,20 @@ class ViewHTML(View): prompt = ("html", "h") content_types = ["text/html"] - def __call__(self, hdrs, content, limit): - if utils.isXML(content): + def __call__(self, data, **metadata): + if utils.isXML(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True ) - d = lxml.html.fromstring(content, parser=parser) + d = lxml.html.fromstring(data, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring( d, pretty_print=True, doctype=docinfo.doctype ) - return "HTML", format_text(s, limit) + return "HTML", format_text(s) class ViewHTMLOutline(View): @@ -223,13 +236,13 @@ class ViewHTMLOutline(View): prompt = ("html outline", "o") content_types = ["text/html"] - def __call__(self, hdrs, content, limit): - content = content.decode("utf-8") + def __call__(self, data, **metadata): + data = data.decode("utf-8") h = html2text.HTML2Text(baseurl="") h.ignore_images = True h.body_width = 0 - content = h.handle(content) - return "HTML Outline", format_text(content, limit) + outline = h.handle(data) + return "HTML Outline", format_text(outline) class ViewURLEncoded(View): @@ -237,8 +250,8 @@ class ViewURLEncoded(View): prompt = ("urlencoded", "u") content_types = ["application/x-www-form-urlencoded"] - def __call__(self, hdrs, content, limit): - d = netlib.utils.urldecode(content) + def __call__(self, data, **metadata): + d = netlib.utils.urldecode(data) return "URLEncoded form", format_dict(ODict(d)) @@ -253,8 +266,9 @@ class ViewMultipart(View): for message in format_dict(ODict(v)): yield message - def __call__(self, hdrs, content, limit): - v = netlib.utils.multipartdecode(hdrs, content) + def __call__(self, data, **metadata): + headers = metadata.get("headers", {}) + v = netlib.utils.multipartdecode(headers, data) if v: return "Multipart form", self._format(v) @@ -308,7 +322,7 @@ if pyamf: else: return b - def _format(self, envelope, limit): + def _format(self, envelope): for target, message in iter(envelope): if isinstance(message, pyamf.remoting.Request): yield [ @@ -322,13 +336,13 @@ if pyamf: ] s = json.dumps(self.unpack(message), indent=4) - for msg in format_text(s, limit): + for msg in format_text(s): yield msg - def __call__(self, hdrs, content, limit): - envelope = remoting.decode(content, strict=False) + def __call__(self, data, **metadata): + envelope = remoting.decode(data, strict=False) if envelope: - return "AMF v%s" % envelope.amfVersion, self._format(envelope, limit) + return "AMF v%s" % envelope.amfVersion, self._format(envelope) class ViewJavaScript(View): @@ -340,12 +354,11 @@ class ViewJavaScript(View): "text/javascript" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): opts = jsbeautifier.default_options() opts.indent_size = 2 - res = jsbeautifier.beautify(content[:limit], opts) - cutoff = max(0, len(content) - limit) - return "JavaScript", format_text(res, limit - cutoff) + res = jsbeautifier.beautify(data, opts) + return "JavaScript", format_text(res) class ViewCSS(View): @@ -355,14 +368,14 @@ class ViewCSS(View): "text/css" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): if cssutils: - sheet = cssutils.parseString(content) + sheet = cssutils.parseString(data) beautified = sheet.cssText else: - beautified = content + beautified = data - return "CSS", format_text(beautified, limit) + return "CSS", format_text(beautified) class ViewImage(View): @@ -376,9 +389,9 @@ class ViewImage(View): "image/x-icon", ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): try: - img = Image.open(cStringIO.StringIO(content)) + img = Image.open(cStringIO.StringIO(data)) except IOError: return None parts = [ @@ -399,12 +412,7 @@ class ViewImage(View): parts.append( (str(tag), str(ex[i])) ) - clean = [] - for i in parts: - clean.append( - [netlib.utils.cleanBin(i[0]), netlib.utils.cleanBin(i[1])] - ) - fmt = format_dict(ODict(clean)) + fmt = format_dict(ODict(parts)) return "%s image" % img.format, fmt @@ -445,9 +453,9 @@ class ViewProtobuf(View): else: return err - def __call__(self, hdrs, content, limit): - decoded = self.decode_protobuf(content) - return "Protobuf", format_text(decoded, limit) + def __call__(self, data, **metadata): + decoded = self.decode_protobuf(data) + return "Protobuf", format_text(decoded) class ViewWBXML(View): @@ -458,13 +466,13 @@ class ViewWBXML(View): "application/vnd.ms-sync.wbxml" ] - def __call__(self, hdrs, content, limit): + def __call__(self, data, **metadata): try: - parser = ASCommandResponse(content) + parser = ASCommandResponse(data) parsedContent = parser.xmlString if parsedContent: - return "WBXML", format_text(parsedContent, limit) + return "WBXML", format_text(parsedContent) except: return None @@ -511,29 +519,31 @@ def get(name): return i -def get_content_view(viewmode, headers, content, limit, is_request): +def get_content_view(viewmode, data, **metadata): """ + Args: + viewmode: the view to use. + data, **metadata: arguments passed to View instance. + Returns: A (description, content generator) tuple. Raises: ContentViewException, if the content view threw an error. """ - if not content: - if is_request: - return "No request content (press tab to view response)", [] - else: - return "No content", [] + if not data: + return "No content", [] msg = [] + headers = metadata.get("headers", {}) enc = headers.get("content-encoding") if enc and enc != "identity": - decoded = encoding.decode(enc, content) + decoded = encoding.decode(enc, data) if decoded: - content = decoded + data = decoded msg.append("[decoded %s]" % enc) try: - ret = viewmode(headers, content, limit) + ret = viewmode(data, **metadata) # Third-party viewers can fail in unexpected ways... except Exception as e: six.reraise( @@ -542,7 +552,7 @@ def get_content_view(viewmode, headers, content, limit, is_request): sys.exc_info()[2] ) if not ret: - ret = get("Raw")(headers, content, limit) + ret = get("Raw")(data, **metadata) msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) diff --git a/libmproxy/dump.py b/libmproxy/dump.py index dd44dc698..d477e0329 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -4,11 +4,11 @@ import os import traceback import click - +import itertools from netlib.http.semantics import CONTENT_MISSING import netlib.utils -from . import flow, filt, contentview +from . import flow, filt, contentviews from .exceptions import ContentViewException from .models import HTTPRequest @@ -57,6 +57,10 @@ class Options(object): setattr(self, i, None) +_contentview_auto = contentviews.get("Auto") +_contentview_raw = contentviews.get("Raw") + + class DumpMaster(flow.FlowMaster): def __init__(self, server, options, outfile=sys.stdout): flow.FlowMaster.__init__(self, server, flow.State()) @@ -174,28 +178,24 @@ class DumpMaster(flow.FlowMaster): ) self.echo(headers, indent=4) if self.o.flow_detail >= 3: - if message.content == CONTENT_MISSING: + if message.body == CONTENT_MISSING: self.echo("(content missing)", indent=4) - elif message.content: + elif message.body: self.echo("") - cutoff = sys.maxsize if self.o.flow_detail >= 4 else contentview.VIEW_CUTOFF + try: - type, lines = contentview.get_content_view( - contentview.get("Auto"), - message.headers, - message.body, - cutoff, - isinstance(message, HTTPRequest) + type, lines = contentviews.get_content_view( + _contentview_auto, + message.body, + headers=message.headers ) except ContentViewException: s = "Content viewer failed: \n" + traceback.format_exc() self.add_event(s, "debug") - type, lines = contentview.get_content_view( - contentview.get("Raw"), - message.headers, - message.body, - cutoff, - isinstance(message, HTTPRequest) + type, lines = contentviews.get_content_view( + _contentview_raw, + message.body, + headers=message.headers ) styles = dict( @@ -210,10 +210,18 @@ class DumpMaster(flow.FlowMaster): for (style, text) in line: yield click.style(text, **styles.get(style, {})) + if self.o.flow_detail == 3: + lines_to_echo = itertools.islice(lines, contentviews.VIEW_CUTOFF) + else: + lines_to_echo = lines + content = "\r\n".join( - "".join(colorful(line)) for line in lines + "".join(colorful(line)) for line in lines_to_echo ) + self.echo(content) + if next(lines, None): + self.echo("(cut off)", indent=4, dim=True) if self.o.flow_detail >= 2: self.echo("") diff --git a/test/test_contentview.py b/test/test_contentview.py index ec1b49303..2089b3eac 100644 --- a/test/test_contentview.py +++ b/test/test_contentview.py @@ -6,7 +6,7 @@ import sys import netlib.utils from netlib import encoding -import libmproxy.contentview as cv +import libmproxy.contentviews as cv import tutils try: @@ -21,12 +21,6 @@ except: class TestContentView: - def test_trailer(self): - txt = "X"*10 - lines = cv.trailer(txt, 1000) - assert not list(lines) - lines = cv.trailer(txt, 5) - assert list(lines) def test_view_auto(self): v = cv.ViewAuto() diff --git a/test/test_dump.py b/test/test_dump.py index 0fc4cd4d2..88f1a6fdb 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -1,6 +1,5 @@ import os from cStringIO import StringIO -from libmproxy.contentview import ViewAuto from libmproxy.exceptions import ContentViewException from libmproxy.models import HTTPResponse @@ -51,7 +50,7 @@ def test_strfuncs(): m.echo_flow(flow) -@mock.patch("libmproxy.contentview.get_content_view") +@mock.patch("libmproxy.contentviews.get_content_view") def test_contentview(get_content_view): get_content_view.side_effect = ContentViewException(""), ("x", [])