From eb2264e91a7fef4170eade4bc6af9c0c4fe9694a Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 12 Sep 2015 17:10:38 +0200 Subject: [PATCH] improve display of non-ascii contents fixes #283 --- libmproxy/contentviews.py | 30 ++++++++++++++++++++++++------ libmproxy/dump.py | 29 ++++++++++++++--------------- libmproxy/protocol/http.py | 17 ++++++++++++----- libmproxy/protocol/rawtcp.py | 4 ++-- libmproxy/utils.py | 13 +++++++------ 5 files changed, 59 insertions(+), 34 deletions(-) diff --git a/libmproxy/contentviews.py b/libmproxy/contentviews.py index a356b35de..9af080335 100644 --- a/libmproxy/contentviews.py +++ b/libmproxy/contentviews.py @@ -27,7 +27,7 @@ import six from netlib.odict import ODict from netlib import encoding -import netlib.utils +from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type from . import utils from .exceptions import ContentViewException @@ -121,12 +121,14 @@ class ViewAuto(View): headers = metadata.get("headers", {}) ctype = headers.get("content-type") if ctype: - ct = netlib.utils.parse_content_type(ctype) if ctype else None + ct = parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](data, **metadata) elif utils.isXML(data): return get("XML")(data, **metadata) + if utils.isMostlyBin(data): + return get("Hex")(data) return get("Raw")(data) @@ -146,7 +148,7 @@ class ViewHex(View): @staticmethod def _format(data): - for offset, hexa, s in netlib.utils.hexdump(data): + for offset, hexa, s in hexdump(data): yield [ ("offset", offset + " "), ("text", hexa + " "), @@ -251,7 +253,7 @@ class ViewURLEncoded(View): content_types = ["application/x-www-form-urlencoded"] def __call__(self, data, **metadata): - d = netlib.utils.urldecode(data) + d = urldecode(data) return "URLEncoded form", format_dict(ODict(d)) @@ -268,7 +270,7 @@ class ViewMultipart(View): def __call__(self, data, **metadata): headers = metadata.get("headers", {}) - v = netlib.utils.multipartdecode(headers, data) + v = multipartdecode(headers, data) if v: return "Multipart form", self._format(v) @@ -519,6 +521,21 @@ def get(name): return i +def safe_to_print(lines, encoding="utf8"): + """ + Wraps a content generator so that each text portion is a *safe to print* unicode string. + """ + for line in lines: + clean_line = [] + for (style, text) in line: + try: + text = clean_bin(text.decode(encoding, "strict")) + except UnicodeDecodeError: + text = clean_bin(text).decode(encoding, "strict") + clean_line.append((style, text)) + yield clean_line + + def get_content_view(viewmode, data, **metadata): """ Args: @@ -527,6 +544,7 @@ def get_content_view(viewmode, data, **metadata): Returns: A (description, content generator) tuple. + In contrast to calling the views directly, text is always safe-to-print unicode. Raises: ContentViewException, if the content view threw an error. @@ -556,4 +574,4 @@ def get_content_view(viewmode, data, **metadata): msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) - return " ".join(msg), ret[1] + return " ".join(msg), safe_to_print(ret[1]) diff --git a/libmproxy/dump.py b/libmproxy/dump.py index d477e0329..9fc9e1b8b 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -57,12 +57,8 @@ class Options(object): setattr(self, i, None) -_contentview_auto = contentviews.get("Auto") -_contentview_raw = contentviews.get("Raw") - - class DumpMaster(flow.FlowMaster): - def __init__(self, server, options, outfile=sys.stdout): + def __init__(self, server, options, outfile=None): flow.FlowMaster.__init__(self, server, flow.State()) self.outfile = outfile self.o = options @@ -91,7 +87,7 @@ class DumpMaster(flow.FlowMaster): if options.outfile: path = os.path.expanduser(options.outfile[0]) try: - f = file(path, options.outfile[1]) + f = open(path, options.outfile[1]) self.start_stream(f, self.filt) except IOError as v: raise DumpError(v.strerror) @@ -185,16 +181,16 @@ class DumpMaster(flow.FlowMaster): try: type, lines = contentviews.get_content_view( - _contentview_auto, - message.body, + contentviews.get("Auto"), + message.body, headers=message.headers ) except ContentViewException: s = "Content viewer failed: \n" + traceback.format_exc() self.add_event(s, "debug") type, lines = contentviews.get_content_view( - _contentview_raw, - message.body, + contentviews.get("Raw"), + message.body, headers=message.headers ) @@ -206,17 +202,19 @@ class DumpMaster(flow.FlowMaster): ) def colorful(line): - yield " " # we can already indent here + yield u" " # we can already indent here for (style, text) in line: yield click.style(text, **styles.get(style, {})) if self.o.flow_detail == 3: - lines_to_echo = itertools.islice(lines, contentviews.VIEW_CUTOFF) + lines_to_echo = itertools.islice(lines, 70) else: lines_to_echo = lines - content = "\r\n".join( - "".join(colorful(line)) for line in lines_to_echo + lines_to_echo = list(lines_to_echo) + + content = u"\r\n".join( + u"".join(colorful(line)) for line in lines_to_echo ) self.echo(content) @@ -302,7 +300,8 @@ class DumpMaster(flow.FlowMaster): if f.error: self.echo(" << {}".format(f.error.msg), bold=True, fg="red") - self.outfile.flush() + if self.outfile: + self.outfile.flush() def _process_flow(self, f): self.state.delete_flow(f) diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 3a4153201..230f2be97 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -1,6 +1,7 @@ from __future__ import (absolute_import, print_function, division) import itertools import sys +import traceback import six @@ -384,9 +385,13 @@ class HttpLayer(Layer): return except (HttpErrorConnClosed, NetLibError, HttpError, ProtocolException) as e: + error_propagated = False if flow.request and not flow.response: - flow.error = Error(repr(e)) + flow.error = Error(str(e)) self.channel.ask("error", flow) + self.log(traceback.format_exc(), "debug") + error_propagated = True + try: self.send_response(make_error_response( getattr(e, "code", 502), @@ -394,10 +399,12 @@ class HttpLayer(Layer): )) except NetLibError: pass - if isinstance(e, ProtocolException): - six.reraise(ProtocolException, e, sys.exc_info()[2]) - else: - six.reraise(ProtocolException, ProtocolException("Error in HTTP connection: %s" % repr(e)), sys.exc_info()[2]) + + if not error_propagated: + if isinstance(e, ProtocolException): + six.reraise(ProtocolException, e, sys.exc_info()[2]) + else: + six.reraise(ProtocolException, ProtocolException("Error in HTTP connection: %s" % repr(e)), sys.exc_info()[2]) finally: flow.live = False diff --git a/libmproxy/protocol/rawtcp.py b/libmproxy/protocol/rawtcp.py index 9b155412a..24c195236 100644 --- a/libmproxy/protocol/rawtcp.py +++ b/libmproxy/protocol/rawtcp.py @@ -7,7 +7,7 @@ import sys from OpenSSL import SSL from netlib.tcp import NetLibError, ssl_read_select -from netlib.utils import cleanBin +from netlib.utils import clean_bin from ..exceptions import ProtocolException from .base import Layer @@ -58,7 +58,7 @@ class RawTCPLayer(Layer): direction = "-> tcp -> {}".format(repr(self.server_conn.address)) else: direction = "<- tcp <- {}".format(repr(self.server_conn.address)) - data = cleanBin(buf[:size].tobytes()) + data = clean_bin(buf[:size].tobytes()) self.log( "{}\r\n{}".format(direction, data), "info" diff --git a/libmproxy/utils.py b/libmproxy/utils.py index 4b5912506..8bd843a0b 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import (absolute_import, print_function, division) import os import datetime import re @@ -30,15 +30,16 @@ def isBin(s): """ for i in s: i = ord(i) - if i < 9: - return True - elif i > 13 and i < 32: - return True - elif i > 126: + if i < 9 or 13 < i < 32 or 126 < i: return True return False +def isMostlyBin(s): + s = s[:100] + return sum(isBin(ch) for ch in s)/len(s) > 0.3 + + def isXML(s): for i in s: if i in "\n \t":