simplify contentview api

2025-02-01 07:49:10 +00:00 · 2015-09-12 13:49:16 +02:00 · 2015-09-12 13:49:16 +02:00 · 049d253a83
commit 049d253a83
parent 9c31669211
7 changed files with 171 additions and 148 deletions
--- a/libmproxy/console/init.py
+++ b/libmproxy/console/init.py
@ -14,7 +14,7 @@ import traceback
 import urwid
 import weakref

-from .. import controller, flow, script, contentview
+from .. import controller, flow, script, contentviews
 from . import flowlist, flowview, help, window, signals, options
 from . import grideditor, palettes, statusbar, palettepicker

@ -26,7 +26,7 @@ class ConsoleState(flow.State):
        flow.State.__init__(self)
        self.focus = None
        self.follow_focus = None
-        self.default_body_view = contentview.get("Auto")
+        self.default_body_view = contentviews.get("Auto")
        self.flowsettings = weakref.WeakKeyDictionary()
        self.last_search = None

@ -648,7 +648,7 @@ class ConsoleMaster(flow.FlowMaster):
        return self.state.set_intercept(txt)

    def change_default_display_mode(self, t):
-        v = contentview.get_by_shortcut(t)
+        v = contentviews.get_by_shortcut(t)
        self.state.default_body_view = v
        self.refresh_focus()

--- a/libmproxy/console/flowview.py
+++ b/libmproxy/console/flowview.py
@ -1,15 +1,15 @@
 from __future__ import absolute_import
 import os
-import sys
 import traceback
+import sys
+
 import urwid

 from netlib import odict
 from netlib.http.semantics import CONTENT_MISSING, Headers
-
 from . import common, grideditor, signals, searchable, tabs
 from . import flowdetailview
-from .. import utils, controller, contentview
+from .. import utils, controller, contentviews
 from ..models import HTTPRequest, HTTPResponse, decoded
 from ..exceptions import ContentViewException

@ -167,10 +167,10 @@ class FlowView(tabs.Tabs):
        if flow == self.flow:
            self.show()

-    def content_view(self, viewmode, conn):
-        if conn.content == CONTENT_MISSING:
+    def content_view(self, viewmode, message):
+        if message.body == CONTENT_MISSING:
            msg, body = "", [urwid.Text([("error", "[content missing]")])]
-            return (msg, body)
+            return msg, body
        else:
            full = self.state.get_flow_setting(
                self.flow,
@ -180,29 +180,43 @@ class FlowView(tabs.Tabs):
            if full:
                limit = sys.maxsize
            else:
-                limit = contentview.VIEW_CUTOFF
+                limit = contentviews.VIEW_CUTOFF
            return cache.get(
                self._get_content_view,
                viewmode,
-                conn.headers,
-                conn.content,
-                limit,
-                isinstance(conn, HTTPRequest)
+                message,
+                limit
            )

-    def _get_content_view(self, viewmode, headers, content, limit, is_request):
+    def _get_content_view(self, viewmode, message, max_lines):
+
        try:
-            description, lines = contentview.get_content_view(
-                viewmode, headers, content, limit, is_request
+            description, lines = contentviews.get_content_view(
+                viewmode, message.body, headers=message.headers
            )
        except ContentViewException:
            s = "Content viewer failed: \n" + traceback.format_exc()
            signals.add_event(s, "error")
-            description, lines = contentview.get_content_view(
-                contentview.get("Raw"), headers, content, limit, is_request
+            description, lines = contentviews.get_content_view(
+                contentviews.get("Raw"), message.body, headers=message.headers
            )
            description = description.replace("Raw", "Couldn't parse: falling back to Raw")
-        text_objects = [urwid.Text(l) for l in lines]
+
+        # Give hint that you have to tab for the response.
+        if description == "No content" and isinstance(message, HTTPRequest):
+            description = "No request content (press tab to view response)"
+
+        text_objects = []
+        for line in lines:
+            text_objects.append(urwid.Text(line))
+            if len(text_objects) == max_lines:
+                text_objects.append(urwid.Text([
+                    ("highlight", "Stopped displaying data after %d lines. Press " % max_lines),
+                    ("key", "f"),
+                    ("highlight", " to load all data.")
+                ]))
+                break
+
        return description, text_objects

    def viewmode_get(self):
@ -227,9 +241,7 @@ class FlowView(tabs.Tabs):
                    [
                        ("heading", msg),
                    ]
-                )
-            ]
-            cols.append(
+                ),
                urwid.Text(
                    [
                        " ",
@ -239,7 +251,7 @@ class FlowView(tabs.Tabs):
                    ],
                    align="right"
                )
-            )
+            ]
            title = urwid.AttrWrap(urwid.Columns(cols), "heading")

            txt.append(title)
@ -471,7 +483,7 @@ class FlowView(tabs.Tabs):
        self.state.add_flow_setting(
            self.flow,
            (self.tab_offset, "prettyview"),
-            contentview.get_by_shortcut(t)
+            contentviews.get_by_shortcut(t)
        )
        signals.flow_change.send(self, flow = self.flow)

@ -611,7 +623,7 @@ class FlowView(tabs.Tabs):
                    scope = "s"
                common.ask_copy_part(scope, self.flow, self.master, self.state)
            elif key == "m":
-                p = list(contentview.view_prompts)
+                p = list(contentviews.view_prompts)
                p.insert(0, ("Clear", "C"))
                signals.status_prompt_onekey.send(
                    self,
--- a/libmproxy/console/options.py
+++ b/libmproxy/console/options.py
@ -1,6 +1,6 @@
 import urwid

-from .. import contentview
+from .. import contentviews
 from . import common, signals, grideditor
 from . import select, palettes

@ -158,7 +158,7 @@ class Options(urwid.WidgetWrap):
        self.master.scripts = []
        self.master.set_stickyauth(None)
        self.master.set_stickycookie(None)
-        self.master.state.default_body_view = contentview.get("Auto")
+        self.master.state.default_body_view = contentviews.get("Auto")

        signals.update_settings.send(self)
        signals.status_message.send(
@ -233,7 +233,7 @@ class Options(urwid.WidgetWrap):
    def default_displaymode(self):
        signals.status_prompt_onekey.send(
            prompt = "Global default display mode",
-            keys = contentview.view_prompts,
+            keys = contentviews.view_prompts,
            callback = self.master.change_default_display_mode
        )

--- a/libmproxy/contentviews.py
+++ b/libmproxy/contentviews.py
@ -1,4 +1,17 @@
-from __future__ import absolute_import
+"""
+Mitmproxy Content Views
+=======================
+
+mitmproxy includes a set of content views which can be used to format/decode/highlight data.
+While they are currently used for HTTP message bodies only, the may be used in other contexts
+in the future, e.g. to decode protobuf messages sent as WebSocket frames.
+
+Thus, the View API is very minimalistic. The only arguments are `data` and `**metadata`,
+where `data` is the actual content (as bytes). The contents on metadata depend on the protocol in
+use. For HTTP, the message headers are passed as the ``headers`` keyword argument.
+
+"""
+from __future__ import (absolute_import, print_function, division)
 import cStringIO
 import json
 import logging
@ -8,7 +21,6 @@ import sys
 import lxml.html
 import lxml.etree
 from PIL import Image
-
 from PIL.ExifTags import TAGS
 import html2text
 import six
@ -16,6 +28,7 @@ import six
 from netlib.odict import ODict
 from netlib import encoding
 import netlib.utils
+
 from . import utils
 from .exceptions import ContentViewException
 from .contrib import jsbeautifier
@ -39,13 +52,15 @@ else:
    cssutils.ser.prefs.indentClosingBrace = False
    cssutils.ser.prefs.validOnly = False

-VIEW_CUTOFF = 1024 * 50
+# Default view cutoff *in lines*
+VIEW_CUTOFF = 512
+
 KEY_MAX = 30


 def format_dict(d):
    """
-    Transforms the given dictionary into a list of
+    Helper function that transforms the given dictionary into a list of
        ("key",   key  )
        ("value", value)
    tuples, where key is padded to a uniform width.
@ -61,39 +76,38 @@ def format_dict(d):
        ]


-def format_text(content, limit):
+def format_text(text):
    """
-    Transforms the given content into
+    Helper function that transforms bytes into the view output format.
    """
-    content = netlib.utils.cleanBin(content)
-
-    for line in content[:limit].splitlines():
+    for line in text.splitlines():
        yield [("text", line)]

-    for msg in trailer(content, limit):
-        yield msg
-
-
-def trailer(content, limit):
-    bytes_removed = len(content) - limit
-    if bytes_removed > 0:
-        yield [
-            ("cutoff", "... {} of data not shown.".format(netlib.utils.pretty_size(bytes_removed)))
-        ]
-

 class View(object):
    name = None
    prompt = ()
    content_types = []

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):
        """
+        Transform raw data into human-readable output.
+
+        Args:
+            data: the data to decode/format as bytes.
+            metadata: optional keyword-only arguments for metadata. Implementations must not
+                rely on a given argument being present.
+
        Returns:
            A (description, content generator) tuple.

-            The content generator yields lists of (style, text) tuples.
-            Iit must not yield tuples of tuples, because urwid cannot process that.
+            The content generator yields lists of (style, text) tuples, where each list represents
+            a single line. ``text`` is a unfiltered byte string which may need to be escaped,
+            depending on the used output.
+
+        Caveats:
+            The content generator must not yield tuples of tuples,
+            because urwid cannot process that. You have to yield a *list* of tuples per line.
        """
        raise NotImplementedError()

@ -103,16 +117,17 @@ class ViewAuto(View):
    prompt = ("auto", "a")
    content_types = []

-    def __call__(self, hdrs, content, limit):
-        ctype = hdrs.get("content-type")
+    def __call__(self, data, **metadata):
+        headers = metadata.get("headers", {})
+        ctype = headers.get("content-type")
        if ctype:
            ct = netlib.utils.parse_content_type(ctype) if ctype else None
            ct = "%s/%s" % (ct[0], ct[1])
            if ct in content_types_map:
-                return content_types_map[ct][0](hdrs, content, limit)
-            elif utils.isXML(content):
-                return get("XML")(hdrs, content, limit)
-        return get("Raw")(hdrs, content, limit)
+                return content_types_map[ct][0](data, **metadata)
+            elif utils.isXML(data):
+                return get("XML")(data, **metadata)
+        return get("Raw")(data)


 class ViewRaw(View):
@ -120,8 +135,8 @@ class ViewRaw(View):
    prompt = ("raw", "r")
    content_types = []

-    def __call__(self, hdrs, content, limit):
-        return "Raw", format_text(content, limit)
+    def __call__(self, data, **metadata):
+        return "Raw", format_text(data)


 class ViewHex(View):
@ -130,18 +145,16 @@ class ViewHex(View):
    content_types = []

    @staticmethod
-    def _format(content, limit):
-        for offset, hexa, s in netlib.utils.hexdump(content[:limit]):
+    def _format(data):
+        for offset, hexa, s in netlib.utils.hexdump(data):
            yield [
                ("offset", offset + " "),
                ("text", hexa + "   "),
                ("text", s)
            ]
-        for msg in trailer(content, limit):
-            yield msg

-    def __call__(self, hdrs, content, limit):
-        return "Hex", self._format(content, limit)
+    def __call__(self, data, **metadata):
+        return "Hex", self._format(data)


 class ViewXML(View):
@ -149,7 +162,7 @@ class ViewXML(View):
    prompt = ("xml", "x")
    content_types = ["text/xml"]

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):
        parser = lxml.etree.XMLParser(
            remove_blank_text=True,
            resolve_entities=False,
@ -157,7 +170,7 @@ class ViewXML(View):
            recover=False
        )
        try:
-            document = lxml.etree.fromstring(content, parser)
+            document = lxml.etree.fromstring(data, parser)
        except lxml.etree.XMLSyntaxError:
            return None
        docinfo = document.getroottree().docinfo
@ -183,7 +196,7 @@ class ViewXML(View):
            encoding=docinfo.encoding
        )

-        return "XML-like data", format_text(s, limit)
+        return "XML-like data", format_text(s)


 class ViewJSON(View):
@ -191,10 +204,10 @@ class ViewJSON(View):
    prompt = ("json", "s")
    content_types = ["application/json"]

-    def __call__(self, hdrs, content, limit):
-        pretty_json = utils.pretty_json(content)
+    def __call__(self, data, **metadata):
+        pretty_json = utils.pretty_json(data)
        if pretty_json:
-            return "JSON", format_text(pretty_json, limit)
+            return "JSON", format_text(pretty_json)


 class ViewHTML(View):
@ -202,20 +215,20 @@ class ViewHTML(View):
    prompt = ("html", "h")
    content_types = ["text/html"]

-    def __call__(self, hdrs, content, limit):
-        if utils.isXML(content):
+    def __call__(self, data, **metadata):
+        if utils.isXML(data):
            parser = lxml.etree.HTMLParser(
                strip_cdata=True,
                remove_blank_text=True
            )
-            d = lxml.html.fromstring(content, parser=parser)
+            d = lxml.html.fromstring(data, parser=parser)
            docinfo = d.getroottree().docinfo
            s = lxml.etree.tostring(
                d,
                pretty_print=True,
                doctype=docinfo.doctype
            )
-            return "HTML", format_text(s, limit)
+            return "HTML", format_text(s)


 class ViewHTMLOutline(View):
@ -223,13 +236,13 @@ class ViewHTMLOutline(View):
    prompt = ("html outline", "o")
    content_types = ["text/html"]

-    def __call__(self, hdrs, content, limit):
-        content = content.decode("utf-8")
+    def __call__(self, data, **metadata):
+        data = data.decode("utf-8")
        h = html2text.HTML2Text(baseurl="")
        h.ignore_images = True
        h.body_width = 0
-        content = h.handle(content)
-        return "HTML Outline", format_text(content, limit)
+        outline = h.handle(data)
+        return "HTML Outline", format_text(outline)


 class ViewURLEncoded(View):
@ -237,8 +250,8 @@ class ViewURLEncoded(View):
    prompt = ("urlencoded", "u")
    content_types = ["application/x-www-form-urlencoded"]

-    def __call__(self, hdrs, content, limit):
-        d = netlib.utils.urldecode(content)
+    def __call__(self, data, **metadata):
+        d = netlib.utils.urldecode(data)
        return "URLEncoded form", format_dict(ODict(d))


@ -253,8 +266,9 @@ class ViewMultipart(View):
        for message in format_dict(ODict(v)):
            yield message

-    def __call__(self, hdrs, content, limit):
-        v = netlib.utils.multipartdecode(hdrs, content)
+    def __call__(self, data, **metadata):
+        headers = metadata.get("headers", {})
+        v = netlib.utils.multipartdecode(headers, data)
        if v:
            return "Multipart form", self._format(v)

@ -308,7 +322,7 @@ if pyamf:
            else:
                return b

-        def _format(self, envelope, limit):
+        def _format(self, envelope):
            for target, message in iter(envelope):
                if isinstance(message, pyamf.remoting.Request):
                    yield [
@ -322,13 +336,13 @@ if pyamf:
                    ]

                s = json.dumps(self.unpack(message), indent=4)
-                for msg in format_text(s, limit):
+                for msg in format_text(s):
                    yield msg

-        def __call__(self, hdrs, content, limit):
-            envelope = remoting.decode(content, strict=False)
+        def __call__(self, data, **metadata):
+            envelope = remoting.decode(data, strict=False)
            if envelope:
-                return "AMF v%s" % envelope.amfVersion, self._format(envelope, limit)
+                return "AMF v%s" % envelope.amfVersion, self._format(envelope)


 class ViewJavaScript(View):
@ -340,12 +354,11 @@ class ViewJavaScript(View):
        "text/javascript"
    ]

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):
        opts = jsbeautifier.default_options()
        opts.indent_size = 2
-        res = jsbeautifier.beautify(content[:limit], opts)
-        cutoff = max(0, len(content) - limit)
-        return "JavaScript", format_text(res, limit - cutoff)
+        res = jsbeautifier.beautify(data, opts)
+        return "JavaScript", format_text(res)


 class ViewCSS(View):
@ -355,14 +368,14 @@ class ViewCSS(View):
        "text/css"
    ]

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):
        if cssutils:
-            sheet = cssutils.parseString(content)
+            sheet = cssutils.parseString(data)
            beautified = sheet.cssText
        else:
-            beautified = content
+            beautified = data

-        return "CSS", format_text(beautified, limit)
+        return "CSS", format_text(beautified)


 class ViewImage(View):
@ -376,9 +389,9 @@ class ViewImage(View):
        "image/x-icon",
    ]

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):
        try:
-            img = Image.open(cStringIO.StringIO(content))
+            img = Image.open(cStringIO.StringIO(data))
        except IOError:
            return None
        parts = [
@ -399,12 +412,7 @@ class ViewImage(View):
                    parts.append(
                        (str(tag), str(ex[i]))
                    )
-        clean = []
-        for i in parts:
-            clean.append(
-                [netlib.utils.cleanBin(i[0]), netlib.utils.cleanBin(i[1])]
-            )
-        fmt = format_dict(ODict(clean))
+        fmt = format_dict(ODict(parts))
        return "%s image" % img.format, fmt


@ -445,9 +453,9 @@ class ViewProtobuf(View):
        else:
            return err

-    def __call__(self, hdrs, content, limit):
-        decoded = self.decode_protobuf(content)
-        return "Protobuf", format_text(decoded, limit)
+    def __call__(self, data, **metadata):
+        decoded = self.decode_protobuf(data)
+        return "Protobuf", format_text(decoded)


 class ViewWBXML(View):
@ -458,13 +466,13 @@ class ViewWBXML(View):
        "application/vnd.ms-sync.wbxml"
    ]

-    def __call__(self, hdrs, content, limit):
+    def __call__(self, data, **metadata):

        try:
-            parser = ASCommandResponse(content)
+            parser = ASCommandResponse(data)
            parsedContent = parser.xmlString
            if parsedContent:
-                return "WBXML", format_text(parsedContent, limit)
+                return "WBXML", format_text(parsedContent)
        except:
            return None

@ -511,29 +519,31 @@ def get(name):
            return i


-def get_content_view(viewmode, headers, content, limit, is_request):
+def get_content_view(viewmode, data, **metadata):
    """
+        Args:
+            viewmode: the view to use.
+            data, **metadata: arguments passed to View instance.
+
        Returns:
            A (description, content generator) tuple.

        Raises:
            ContentViewException, if the content view threw an error.
    """
-    if not content:
-        if is_request:
-            return "No request content (press tab to view response)", []
-        else:
-            return "No content", []
+    if not data:
+        return "No content", []
    msg = []

+    headers = metadata.get("headers", {})
    enc = headers.get("content-encoding")
    if enc and enc != "identity":
-        decoded = encoding.decode(enc, content)
+        decoded = encoding.decode(enc, data)
        if decoded:
-            content = decoded
+            data = decoded
            msg.append("[decoded %s]" % enc)
    try:
-        ret = viewmode(headers, content, limit)
+        ret = viewmode(data, **metadata)
    # Third-party viewers can fail in unexpected ways...
    except Exception as e:
        six.reraise(
@ -542,7 +552,7 @@ def get_content_view(viewmode, headers, content, limit, is_request):
            sys.exc_info()[2]
        )
    if not ret:
-        ret = get("Raw")(headers, content, limit)
+        ret = get("Raw")(data, **metadata)
        msg.append("Couldn't parse: falling back to Raw")
    else:
        msg.append(ret[0])
--- a/libmproxy/dump.py
+++ b/libmproxy/dump.py
@ -4,11 +4,11 @@ import os
 import traceback

 import click
-
+import itertools

 from netlib.http.semantics import CONTENT_MISSING
 import netlib.utils
-from . import flow, filt, contentview
+from . import flow, filt, contentviews
 from .exceptions import ContentViewException
 from .models import HTTPRequest

@ -57,6 +57,10 @@ class Options(object):
                setattr(self, i, None)


+_contentview_auto = contentviews.get("Auto")
+_contentview_raw = contentviews.get("Raw")
+
+
 class DumpMaster(flow.FlowMaster):
    def __init__(self, server, options, outfile=sys.stdout):
        flow.FlowMaster.__init__(self, server, flow.State())
@ -174,28 +178,24 @@ class DumpMaster(flow.FlowMaster):
            )
            self.echo(headers, indent=4)
        if self.o.flow_detail >= 3:
-            if message.content == CONTENT_MISSING:
+            if message.body == CONTENT_MISSING:
                self.echo("(content missing)", indent=4)
-            elif message.content:
+            elif message.body:
                self.echo("")
-                cutoff = sys.maxsize if self.o.flow_detail >= 4 else contentview.VIEW_CUTOFF
+
                try:
-                    type, lines = contentview.get_content_view(
-                        contentview.get("Auto"),
-                        message.headers,
-                        message.body,
-                        cutoff,
-                        isinstance(message, HTTPRequest)
+                    type, lines = contentviews.get_content_view(
+                        _contentview_auto,
+                         message.body,
+                        headers=message.headers
                    )
                except ContentViewException:
                    s = "Content viewer failed: \n" + traceback.format_exc()
                    self.add_event(s, "debug")
-                    type, lines = contentview.get_content_view(
-                        contentview.get("Raw"),
-                        message.headers,
-                        message.body,
-                        cutoff,
-                        isinstance(message, HTTPRequest)
+                    type, lines = contentviews.get_content_view(
+                        _contentview_raw,
+                         message.body,
+                        headers=message.headers
                    )

                styles = dict(
@ -210,10 +210,18 @@ class DumpMaster(flow.FlowMaster):
                    for (style, text) in line:
                        yield click.style(text, **styles.get(style, {}))

+                if self.o.flow_detail == 3:
+                    lines_to_echo = itertools.islice(lines, contentviews.VIEW_CUTOFF)
+                else:
+                    lines_to_echo = lines
+
                content = "\r\n".join(
-                    "".join(colorful(line)) for line in lines
+                    "".join(colorful(line)) for line in lines_to_echo
                )
+
                self.echo(content)
+                if next(lines, None):
+                    self.echo("(cut off)", indent=4, dim=True)

        if self.o.flow_detail >= 2:
            self.echo("")
--- a/test/test_contentview.py
+++ b/test/test_contentview.py
@ -6,7 +6,7 @@ import sys
 import netlib.utils
 from netlib import encoding

-import libmproxy.contentview as cv
+import libmproxy.contentviews as cv
 import tutils

 try:
@ -21,12 +21,6 @@ except:


 class TestContentView:
-    def test_trailer(self):
-        txt = "X"*10
-        lines = cv.trailer(txt, 1000)
-        assert not list(lines)
-        lines = cv.trailer(txt, 5)
-        assert list(lines)

    def test_view_auto(self):
        v = cv.ViewAuto()
--- a/test/test_dump.py
+++ b/test/test_dump.py
@ -1,6 +1,5 @@
 import os
 from cStringIO import StringIO
-from libmproxy.contentview import ViewAuto
 from libmproxy.exceptions import ContentViewException
 from libmproxy.models import HTTPResponse

@ -51,7 +50,7 @@ def test_strfuncs():
    m.echo_flow(flow)


-@mock.patch("libmproxy.contentview.get_content_view")
+@mock.patch("libmproxy.contentviews.get_content_view")
 def test_contentview(get_content_view):
    get_content_view.side_effect = ContentViewException(""), ("x", [])