mitmproxy/libmproxy/console/contentview.py

import re, cStringIO
import urwid
from PIL import Image
from PIL.ExifTags import TAGS
import lxml.html, lxml.etree
import common
from .. import utils, encoding, flow
from ..contrib import jsbeautifier

VIEW_CUTOFF = 1024*100

VIEW_AUTO = 0
VIEW_JSON = 1
VIEW_XML = 2
VIEW_URLENCODED = 3
VIEW_MULTIPART = 4
VIEW_JAVASCRIPT = 5
VIEW_IMAGE = 6
VIEW_RAW = 7
VIEW_HEX = 8
VIEW_HTML = 9

VIEW_NAMES = {
    VIEW_AUTO: "Auto",
    VIEW_JSON: "JSON",
    VIEW_XML: "XML",
    VIEW_URLENCODED: "URL-encoded",
    VIEW_MULTIPART: "Multipart Form",
    VIEW_JAVASCRIPT: "JavaScript",
    VIEW_IMAGE: "Image",
    VIEW_RAW: "Raw",
    VIEW_HEX: "Hex",
    VIEW_HTML: "HTML",
}


VIEW_PROMPT = (
    ("auto detect", "a"),
    ("hex", "e"),
    ("html", "h"),
    ("image", "i"),
    ("javascript", "j"),
    ("json", "s"),
    ("raw", "r"),
    ("multipart", "m"),
    ("urlencoded", "u"),
    ("xml", "x"),
)

VIEW_SHORTCUTS = {
    "a": VIEW_AUTO,
    "x": VIEW_XML,
    "h": VIEW_HTML,
    "i": VIEW_IMAGE,
    "j": VIEW_JAVASCRIPT,
    "s": VIEW_JSON,
    "u": VIEW_URLENCODED,
    "m": VIEW_MULTIPART,
    "r": VIEW_RAW,
    "e": VIEW_HEX,
}

CONTENT_TYPES_MAP = {
    "text/html": VIEW_HTML,
    "application/json": VIEW_JSON,
    "text/xml": VIEW_XML,
    "multipart/form-data": VIEW_MULTIPART,
    "application/x-www-form-urlencoded": VIEW_URLENCODED,
    "application/x-javascript": VIEW_JAVASCRIPT,
    "application/javascript": VIEW_JAVASCRIPT,
    "text/javascript": VIEW_JAVASCRIPT,
    "image/png": VIEW_IMAGE,
    "image/jpeg": VIEW_IMAGE,
    "image/gif": VIEW_IMAGE,
    "image/vnd.microsoft.icon": VIEW_IMAGE,
    "image/x-icon": VIEW_IMAGE,
}

def trailer(clen, txt):
    rem = clen - VIEW_CUTOFF
    if rem > 0:
        txt.append(urwid.Text(""))
        txt.append(
            urwid.Text(
                [
                    ("highlight", "... %s of data not shown"%utils.pretty_size(rem))
                ]
            )
        )


def _view_text(content, total):
    """
        Generates a body for a chunk of text.
    """
    txt = []
    for i in utils.cleanBin(content).splitlines():
        txt.append(
            urwid.Text(("text", i))
        )
    trailer(total, txt)
    return txt


def view_raw(hdrs, content):
    txt = _view_text(content[:VIEW_CUTOFF], len(content))
    return "Raw", txt


def view_hex(hdrs, content):
    txt = []
    for offset, hexa, s in utils.hexdump(content[:VIEW_CUTOFF]):
        txt.append(urwid.Text([
            ("offset", offset),
            " ",
            ("text", hexa),
            "   ",
            ("text", s),
        ]))
    trailer(len(content), txt)
    return "Hex", txt


def view_xml(hdrs, content):
    parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)
    try:
        document = lxml.etree.fromstring(content, parser)
    except lxml.etree.XMLSyntaxError, v:
        return None
    docinfo = document.getroottree().docinfo

    prev = []
    p = document.getroottree().getroot().getprevious()
    while p is not None:
        prev.insert(
            0,
            lxml.etree.tostring(p)
        )
        p = p.getprevious()
    doctype=docinfo.doctype
    if prev:
        doctype += "\n".join(prev).strip()
    doctype = doctype.strip()

    s = lxml.etree.tostring(
            document,
            pretty_print=True,
            xml_declaration=True,
            doctype=doctype or None,
            encoding = docinfo.encoding
        )

    txt = []
    for i in s[:VIEW_CUTOFF].strip().split("\n"):
        txt.append(
            urwid.Text(("text", i)),
        )
    trailer(len(content), txt)
    return "XML-like data", txt


def view_html(hdrs, content):
    if utils.isXML(content):
        parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)
        d = lxml.html.fromstring(content, parser=parser)
        docinfo = d.getroottree().docinfo
        s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)
        return "HTML", _view_text(s[:VIEW_CUTOFF], len(s))


def view_json(hdrs, content):
    lines = utils.pretty_json(content)
    if lines:
        txt = []
        sofar = 0
        for i in lines:
            sofar += len(i)
            txt.append(
                urwid.Text(("text", i)),
            )
            if sofar > VIEW_CUTOFF:
                break
        trailer(sum(len(i) for i in lines), txt)
        return "JSON", txt


def view_multipart(hdrs, content):
    v = hdrs.get("content-type")
    if v:
        v = utils.parse_content_type(v[0])
        if not v:
            return
        boundary = v[2].get("boundary")
        if not boundary:
            return

        rx = re.compile(r'\bname="([^"]+)"')
        keys = []
        vals = []

        for i in content.split("--" + boundary):
            parts = i.splitlines()
            if len(parts) > 1 and parts[0][0:2] != "--":
                match = rx.search(parts[1])
                if match:
                    keys.append(match.group(1) + ":")
                    vals.append(utils.cleanBin(
                        "\n".join(parts[3+parts[2:].index(""):])
                    ))
        r = [
            urwid.Text(("highlight", "Form data:\n")),
        ]
        r.extend(common.format_keyvals(
            zip(keys, vals),
            key = "header",
            val = "text"
        ))
        return "Multipart form", r


def view_urlencoded(hdrs, content):
    lines = utils.urldecode(content)
    if lines:
        body = common.format_keyvals(
                    [(k+":", v) for (k, v) in lines],
                    key = "header",
                    val = "text"
               )
        return "URLEncoded form", body


def view_javascript(hdrs, content):
    opts = jsbeautifier.default_options()
    opts.indent_size = 2
    try:
        res = jsbeautifier.beautify(content[:VIEW_CUTOFF], opts)
    except:
        # Bugs in jsbeautifier mean that it can trhow arbitrary errors.
        return None
    return "JavaScript", _view_text(res, len(content))


def view_image(hdrs, content):
    try:
        img = Image.open(cStringIO.StringIO(content))
    except IOError:
        return None
    parts = [
        ("Format", str(img.format_description)),
        ("Size", "%s x %s px"%img.size),
        ("Mode", str(img.mode)),
    ]
    for i in sorted(img.info.keys()):
        if i != "exif":
            parts.append(
                (str(i), str(img.info[i]))
            )
    if hasattr(img, "_getexif"):
        ex = img._getexif()
        if ex:
            for i in sorted(ex.keys()):
                tag = TAGS.get(i, i)
                parts.append(
                    (str(tag), str(ex[i]))
                )
    clean = []
    for i in parts:
        clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])
    fmt = common.format_keyvals(
            clean,
            key = "header",
            val = "text"
        )
    return "%s image"%img.format, fmt


PRETTY_FUNCTION_MAP = {
    VIEW_XML: view_xml,
    VIEW_HTML: view_html,
    VIEW_JSON: view_json,
    VIEW_URLENCODED: view_urlencoded,
    VIEW_MULTIPART: view_multipart,
    VIEW_JAVASCRIPT: view_javascript,
    VIEW_IMAGE: view_image,
    VIEW_HEX: view_hex,
    VIEW_RAW: view_raw,
}

def get_view_func(viewmode, hdrs, content):
    """
        Returns a function object.
    """
    if viewmode == VIEW_AUTO:
        ctype = hdrs.get("content-type")
        if ctype:
            ctype = ctype[0]
        ct = utils.parse_content_type(ctype) if ctype else None
        if ct:
            viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1]))
        if not viewmode and utils.isXML(content):
            viewmode = VIEW_XML
    return PRETTY_FUNCTION_MAP.get(viewmode, view_raw)


def get_content_view(viewmode, hdrItems, content):
    """
        Returns a (msg, body) tuple.
    """
    msg = []

    hdrs = flow.ODictCaseless([list(i) for i in hdrItems])

    enc = hdrs.get("content-encoding")
    if enc and enc[0] != "identity":
        decoded = encoding.decode(enc[0], content)
        if decoded:
            content = decoded
            msg.append("[decoded %s]"%enc[0])
    func = get_view_func(viewmode, hdrs, content)
    ret = func(hdrs, content)
    if not ret:
        viewmode = VIEW_RAW
        ret = view_raw(hdrs, content)
        msg.append("Couldn't parse: falling back to Raw")
    else:
        msg.append(ret[0])
    return " ".join(msg), ret[1]
Add a pretty-viewer for images. This shows basic image information like dimensions, plus extracted EXIF tags and other metadata. 2012-03-25 22:26:02 +00:00			`import re, cStringIO`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`import urwid`
Add a pretty-viewer for images. This shows basic image information like dimensions, plus extracted EXIF tags and other metadata. 2012-03-25 22:26:02 +00:00			`from PIL import Image`
			`from PIL.ExifTags import TAGS`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`import lxml.html, lxml.etree`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`import common`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`from .. import utils, encoding, flow`
Pretty view now indents Javascript. Thanks to the JSBeautifier project, which is now included in the contrib directory. 2012-03-24 21:56:45 +00:00			`from ..contrib import jsbeautifier`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00
XML/HTML pretty view tweaks. 2012-04-07 10:15:31 +00:00			`VIEW_CUTOFF = 1024*100`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`VIEW_AUTO = 0`
			`VIEW_JSON = 1`
			`VIEW_XML = 2`
			`VIEW_URLENCODED = 3`
			`VIEW_MULTIPART = 4`
			`VIEW_JAVASCRIPT = 5`
			`VIEW_IMAGE = 6`
			`VIEW_RAW = 7`
			`VIEW_HEX = 8`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`VIEW_HTML = 9`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00
			`VIEW_NAMES = {`
			`VIEW_AUTO: "Auto",`
			`VIEW_JSON: "JSON",`
			`VIEW_XML: "XML",`
			`VIEW_URLENCODED: "URL-encoded",`
			`VIEW_MULTIPART: "Multipart Form",`
			`VIEW_JAVASCRIPT: "JavaScript",`
			`VIEW_IMAGE: "Image",`
			`VIEW_RAW: "Raw",`
			`VIEW_HEX: "Hex",`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`VIEW_HTML: "HTML",`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`}`

Move "M" shortcut key to global scope. You now don't have to be viewing a flow to change global default view. 2012-04-01 22:49:57 +00:00
			`VIEW_PROMPT = (`
			`("auto detect", "a"),`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`("hex", "e"),`
			`("html", "h"),`
Move "M" shortcut key to global scope. You now don't have to be viewing a flow to change global default view. 2012-04-01 22:49:57 +00:00			`("image", "i"),`
			`("javascript", "j"),`
			`("json", "s"),`
			`("raw", "r"),`
			`("multipart", "m"),`
			`("urlencoded", "u"),`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`("xml", "x"),`
Move "M" shortcut key to global scope. You now don't have to be viewing a flow to change global default view. 2012-04-01 22:49:57 +00:00			`)`

Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`VIEW_SHORTCUTS = {`
			`"a": VIEW_AUTO,`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`"x": VIEW_XML,`
			`"h": VIEW_HTML,`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`"i": VIEW_IMAGE,`
			`"j": VIEW_JAVASCRIPT,`
			`"s": VIEW_JSON,`
			`"u": VIEW_URLENCODED,`
Move "M" shortcut key to global scope. You now don't have to be viewing a flow to change global default view. 2012-04-01 22:49:57 +00:00			`"m": VIEW_MULTIPART,`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`"r": VIEW_RAW,`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`"e": VIEW_HEX,`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`}`

Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`CONTENT_TYPES_MAP = {`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`"text/html": VIEW_HTML,`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`"application/json": VIEW_JSON,`
			`"text/xml": VIEW_XML,`
			`"multipart/form-data": VIEW_MULTIPART,`
			`"application/x-www-form-urlencoded": VIEW_URLENCODED,`
			`"application/x-javascript": VIEW_JAVASCRIPT,`
			`"application/javascript": VIEW_JAVASCRIPT,`
			`"text/javascript": VIEW_JAVASCRIPT,`
			`"image/png": VIEW_IMAGE,`
			`"image/jpeg": VIEW_IMAGE,`
			`"image/gif": VIEW_IMAGE,`
			`"image/vnd.microsoft.icon": VIEW_IMAGE,`
			`"image/x-icon": VIEW_IMAGE,`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`}`

			`def trailer(clen, txt):`
			`rem = clen - VIEW_CUTOFF`
			`if rem > 0:`
			`txt.append(urwid.Text(""))`
			`txt.append(`
			`urwid.Text(`
			`[`
			`("highlight", "... %s of data not shown"%utils.pretty_size(rem))`
			`]`
			`)`
			`)`

Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00
Tune content viewing to maintain responsiveness: - Reduce display cutoff to 20k. - Make sure that we only indent the visible part of a JS body, not the whole thing. 2012-03-31 22:09:25 +00:00			`def _view_text(content, total):`
Pretty view now indents Javascript. Thanks to the JSBeautifier project, which is now included in the contrib directory. 2012-03-24 21:56:45 +00:00			`"""`
			`Generates a body for a chunk of text.`
			`"""`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`txt = []`
Tune content viewing to maintain responsiveness: - Reduce display cutoff to 20k. - Make sure that we only indent the visible part of a JS body, not the whole thing. 2012-03-31 22:09:25 +00:00			`for i in utils.cleanBin(content).splitlines():`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`txt.append(`
			`urwid.Text(("text", i))`
			`)`
Tune content viewing to maintain responsiveness: - Reduce display cutoff to 20k. - Make sure that we only indent the visible part of a JS body, not the whole thing. 2012-03-31 22:09:25 +00:00			`trailer(total, txt)`
Pretty view now indents Javascript. Thanks to the JSBeautifier project, which is now included in the contrib directory. 2012-03-24 21:56:45 +00:00			`return txt`


			`def view_raw(hdrs, content):`
Tune content viewing to maintain responsiveness: - Reduce display cutoff to 20k. - Make sure that we only indent the visible part of a JS body, not the whole thing. 2012-03-31 22:09:25 +00:00			`txt = _view_text(content[:VIEW_CUTOFF], len(content))`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`return "Raw", txt`

Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`def view_hex(hdrs, content):`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`txt = []`
			`for offset, hexa, s in utils.hexdump(content[:VIEW_CUTOFF]):`
			`txt.append(urwid.Text([`
			`("offset", offset),`
			`" ",`
			`("text", hexa),`
			`" ",`
			`("text", s),`
			`]))`
			`trailer(len(content), txt)`
Make it clearer when we fall back to Raw. 2012-04-01 23:22:01 +00:00			`return "Hex", txt`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`def view_xml(hdrs, content):`
			`parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False)`
			`try:`
			`document = lxml.etree.fromstring(content, parser)`
			`except lxml.etree.XMLSyntaxError, v:`
			`return None`
			`docinfo = document.getroottree().docinfo`

			`prev = []`
			`p = document.getroottree().getroot().getprevious()`
			`while p is not None:`
			`prev.insert(`
			`0,`
			`lxml.etree.tostring(p)`
			`)`
			`p = p.getprevious()`
XML/HTML pretty view tweaks. 2012-04-07 10:15:31 +00:00			`doctype=docinfo.doctype`
			`if prev:`
			`doctype += "\n".join(prev).strip()`
			`doctype = doctype.strip()`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00
			`s = lxml.etree.tostring(`
			`document,`
			`pretty_print=True,`
			`xml_declaration=True,`
XML/HTML pretty view tweaks. 2012-04-07 10:15:31 +00:00			`doctype=doctype or None,`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`encoding = docinfo.encoding`
			`)`

Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`txt = []`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`for i in s[:VIEW_CUTOFF].strip().split("\n"):`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`txt.append(`
			`urwid.Text(("text", i)),`
			`)`
			`trailer(len(content), txt)`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`return "XML-like data", txt`


Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`def view_html(hdrs, content):`
			`if utils.isXML(content):`
			`parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True)`
			`d = lxml.html.fromstring(content, parser=parser)`
			`docinfo = d.getroottree().docinfo`
			`s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype)`
XML/HTML pretty view tweaks. 2012-04-07 10:15:31 +00:00			`return "HTML", _view_text(s[:VIEW_CUTOFF], len(s))`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00

Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`def view_json(hdrs, content):`
			`lines = utils.pretty_json(content)`
			`if lines:`
			`txt = []`
			`sofar = 0`
			`for i in lines:`
			`sofar += len(i)`
			`txt.append(`
			`urwid.Text(("text", i)),`
			`)`
			`if sofar > VIEW_CUTOFF:`
			`break`
			`trailer(sum(len(i) for i in lines), txt)`
			`return "JSON", txt`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00

Re-enable simple multipart form parsing and preview. 2012-03-24 21:10:48 +00:00			`def view_multipart(hdrs, content):`
			`v = hdrs.get("content-type")`
			`if v:`
			`v = utils.parse_content_type(v[0])`
			`if not v:`
			`return`
			`boundary = v[2].get("boundary")`
			`if not boundary:`
			`return`

			`rx = re.compile(r'\bname="([^"]+)"')`
			`keys = []`
			`vals = []`

			`for i in content.split("--" + boundary):`
			`parts = i.splitlines()`
			`if len(parts) > 1 and parts[0][0:2] != "--":`
			`match = rx.search(parts[1])`
			`if match:`
			`keys.append(match.group(1) + ":")`
			`vals.append(utils.cleanBin(`
			`"\n".join(parts[3+parts[2:].index(""):])`
			`))`
			`r = [`
			`urwid.Text(("highlight", "Form data:\n")),`
			`]`
			`r.extend(common.format_keyvals(`
			`zip(keys, vals),`
			`key = "header",`
			`val = "text"`
			`))`
			`return "Multipart form", r`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00

Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`def view_urlencoded(hdrs, content):`
			`lines = utils.urldecode(content)`
			`if lines:`
			`body = common.format_keyvals(`
			`[(k+":", v) for (k, v) in lines],`
			`key = "header",`
			`val = "text"`
			`)`
			`return "URLEncoded form", body`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00

Pretty view now indents Javascript. Thanks to the JSBeautifier project, which is now included in the contrib directory. 2012-03-24 21:56:45 +00:00			`def view_javascript(hdrs, content):`
			`opts = jsbeautifier.default_options()`
			`opts.indent_size = 2`
Catch all errors when using jsbeautifier. Turns out there are some problems that can raise arbitrary exceptions. 2012-04-08 02:40:59 +00:00			`try:`
			`res = jsbeautifier.beautify(content[:VIEW_CUTOFF], opts)`
			`except:`
			`# Bugs in jsbeautifier mean that it can trhow arbitrary errors.`
			`return None`
Tune content viewing to maintain responsiveness: - Reduce display cutoff to 20k. - Make sure that we only indent the visible part of a JS body, not the whole thing. 2012-03-31 22:09:25 +00:00			`return "JavaScript", _view_text(res, len(content))`
Pretty view now indents Javascript. Thanks to the JSBeautifier project, which is now included in the contrib directory. 2012-03-24 21:56:45 +00:00

Add a pretty-viewer for images. This shows basic image information like dimensions, plus extracted EXIF tags and other metadata. 2012-03-25 22:26:02 +00:00			`def view_image(hdrs, content):`
			`try:`
			`img = Image.open(cStringIO.StringIO(content))`
			`except IOError:`
			`return None`
			`parts = [`
			`("Format", str(img.format_description)),`
			`("Size", "%s x %s px"%img.size),`
			`("Mode", str(img.mode)),`
			`]`
			`for i in sorted(img.info.keys()):`
			`if i != "exif":`
			`parts.append(`
			`(str(i), str(img.info[i]))`
			`)`
			`if hasattr(img, "_getexif"):`
			`ex = img._getexif()`
			`if ex:`
			`for i in sorted(ex.keys()):`
			`tag = TAGS.get(i, i)`
			`parts.append(`
			`(str(tag), str(ex[i]))`
			`)`
			`clean = []`
			`for i in parts:`
			`clean.append([utils.cleanBin(i[0]), utils.cleanBin(i[1])])`
			`fmt = common.format_keyvals(`
			`clean,`
			`key = "header",`
			`val = "text"`
			`)`
			`return "%s image"%img.format, fmt`


Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`PRETTY_FUNCTION_MAP = {`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`VIEW_XML: view_xml,`
			`VIEW_HTML: view_html,`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`VIEW_JSON: view_json,`
			`VIEW_URLENCODED: view_urlencoded,`
			`VIEW_MULTIPART: view_multipart,`
			`VIEW_JAVASCRIPT: view_javascript,`
			`VIEW_IMAGE: view_image,`
			`VIEW_HEX: view_hex,`
			`VIEW_RAW: view_raw,`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`}`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`def get_view_func(viewmode, hdrs, content):`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`"""`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`Returns a function object.`
Factor out content view apparatus into contentview.py 2012-03-23 22:21:58 +00:00			`"""`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`if viewmode == VIEW_AUTO:`
			`ctype = hdrs.get("content-type")`
			`if ctype:`
			`ctype = ctype[0]`
			`ct = utils.parse_content_type(ctype) if ctype else None`
			`if ct:`
			`viewmode = CONTENT_TYPES_MAP.get("%s/%s"%(ct[0], ct[1]))`
			`if not viewmode and utils.isXML(content):`
			`viewmode = VIEW_XML`
			`return PRETTY_FUNCTION_MAP.get(viewmode, view_raw)`


			`def get_content_view(viewmode, hdrItems, content):`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`"""`
			`Returns a (msg, body) tuple.`
			`"""`
			`msg = []`

			`hdrs = flow.ODictCaseless([list(i) for i in hdrItems])`

			`enc = hdrs.get("content-encoding")`
			`if enc and enc[0] != "identity":`
			`decoded = encoding.decode(enc[0], content)`
			`if decoded:`
			`content = decoded`
			`msg.append("[decoded %s]"%enc[0])`
Start rationalizing content views. We now no longer have distinction between "pretty" view and hex/raw. Instead, we simply a default AUTO view with a global override (M) and a local override (m). 2012-04-01 22:30:35 +00:00			`func = get_view_func(viewmode, hdrs, content)`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`ret = func(hdrs, content)`
			`if not ret:`
Make it clearer when we fall back to Raw. 2012-04-01 23:22:01 +00:00			`viewmode = VIEW_RAW`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`ret = view_raw(hdrs, content)`
Integrate lxml for pretty-printing HTML and XML. Tackling the pretty-printing performance problem head-on, at the cost of a major dependency. 2012-04-07 01:47:03 +00:00			`msg.append("Couldn't parse: falling back to Raw")`
Make it clearer when we fall back to Raw. 2012-04-01 23:22:01 +00:00			`else:`
			`msg.append(ret[0])`
Refactor pretty view mechanism. Also start adding unit tests for this subsystem. 2012-03-24 01:02:41 +00:00			`return " ".join(msg), ret[1]`