From 4b4a18a2e4d7cf3e8862192b68f5a2295da9acbe Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 21 Jul 2014 21:06:55 +0200 Subject: [PATCH] add --stream options, various fixes --- examples/stream.py | 5 ++ examples/stub.py | 9 +++ libmproxy/cmdline.py | 100 +++++++++++++++++----------------- libmproxy/console/__init__.py | 3 + libmproxy/dump.py | 14 ++++- libmproxy/flow.py | 22 ++++++++ libmproxy/protocol/http.py | 19 ++++--- 7 files changed, 111 insertions(+), 61 deletions(-) create mode 100644 examples/stream.py diff --git a/examples/stream.py b/examples/stream.py new file mode 100644 index 000000000..f9f03c3ea --- /dev/null +++ b/examples/stream.py @@ -0,0 +1,5 @@ +def responseheaders(ctx, flow): + """ + Enables streaming for all responses. + """ + flow.response.stream = True \ No newline at end of file diff --git a/examples/stub.py b/examples/stub.py index 78cbfcf2a..0cf67db7c 100644 --- a/examples/stub.py +++ b/examples/stub.py @@ -27,6 +27,15 @@ def request(ctx, flow): """ ctx.log("request") + +def responseheaders(ctx, flow): + """ + Called when the response headers for a server response have been received, + but the response body has not been processed yet. Can be used to tell mitmproxy + to stream the response. + """ + ctx.log("responseheaders") + def response(ctx, flow): """ Called when a server response has been received. diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py index e407cd102..c9c0c75ef 100644 --- a/libmproxy/cmdline.py +++ b/libmproxy/cmdline.py @@ -3,7 +3,7 @@ import re import argparse from argparse import ArgumentTypeError from netlib import http -from . import proxy, filt +from . import filt, utils from .proxy import config APP_HOST = "mitm.it" @@ -23,13 +23,13 @@ def _parse_hook(s): elif len(parts) == 3: patt, a, b = parts else: - raise ParseException("Malformed hook specifier - too few clauses: %s"%s) + raise ParseException("Malformed hook specifier - too few clauses: %s" % s) if not a: - raise ParseException("Empty clause: %s"%str(patt)) + raise ParseException("Empty clause: %s" % str(patt)) if not filt.parse(patt): - raise ParseException("Malformed filter pattern: %s"%patt) + raise ParseException("Malformed filter pattern: %s" % patt) return patt, a, b @@ -64,7 +64,7 @@ def parse_replace_hook(s): try: re.compile(regex) except re.error, e: - raise ParseException("Malformed replacement regex: %s"%str(e.message)) + raise ParseException("Malformed replacement regex: %s" % str(e.message)) return patt, regex, replacement @@ -98,7 +98,6 @@ def parse_setheader(s): def parse_server_spec(url): - normalized_url = re.sub("^https?2", "", url) p = http.parse_url(normalized_url) @@ -125,6 +124,8 @@ def get_common_options(options): if options.stickyauth_filt: stickyauth = options.stickyauth_filt + stream_large_bodies = utils.parse_size(options.stream_large_bodies) + reps = [] for i in options.replace: try: @@ -140,10 +141,9 @@ def get_common_options(options): try: v = open(path, "rb").read() except IOError, e: - raise ArgumentTypeError("Could not read replace file: %s"%path) + raise ArgumentTypeError("Could not read replace file: %s" % path) reps.append((patt, rex, v)) - setheaders = [] for i in options.setheader: try: @@ -153,29 +153,30 @@ def get_common_options(options): setheaders.append(p) return dict( - app = options.app, - app_host = options.app_host, - app_port = options.app_port, - app_external = options.app_external, + app=options.app, + app_host=options.app_host, + app_port=options.app_port, + app_external=options.app_external, - anticache = options.anticache, - anticomp = options.anticomp, - client_replay = options.client_replay, - kill = options.kill, - no_server = options.no_server, - refresh_server_playback = not options.norefresh, - rheaders = options.rheaders, - rfile = options.rfile, - replacements = reps, - setheaders = setheaders, - server_replay = options.server_replay, - scripts = options.scripts, - stickycookie = stickycookie, - stickyauth = stickyauth, - showhost = options.showhost, - wfile = options.wfile, - verbosity = options.verbose, - nopop = options.nopop, + anticache=options.anticache, + anticomp=options.anticomp, + client_replay=options.client_replay, + kill=options.kill, + no_server=options.no_server, + refresh_server_playback=not options.norefresh, + rheaders=options.rheaders, + rfile=options.rfile, + replacements=reps, + setheaders=setheaders, + server_replay=options.server_replay, + scripts=options.scripts, + stickycookie=stickycookie, + stickyauth=stickyauth, + stream_large_bodies=stream_large_bodies, + showhost=options.showhost, + wfile=options.wfile, + verbosity=options.verbose, + nopop=options.nopop, ) @@ -187,8 +188,8 @@ def common_options(parser): ) parser.add_argument( "--confdir", - action="store", type = str, dest="confdir", default='~/.mitmproxy', - help = "Configuration directory. (~/.mitmproxy)" + action="store", type=str, dest="confdir", default='~/.mitmproxy', + help="Configuration directory. (~/.mitmproxy)" ) parser.add_argument( "--host", @@ -240,10 +241,16 @@ def common_options(parser): "-Z", action="store", dest="body_size_limit", default=None, metavar="SIZE", - help="Byte size limit of HTTP request and response bodies."\ + help="Byte size limit of HTTP request and response bodies." \ " Understands k/m/g suffixes, i.e. 3m for 3 megabytes." ) - + parser.add_argument( + "--stream", + action="store", dest="stream_large_bodies", default=None, + metavar="SIZE", + help="Stream data to the client if response body exceeds the given threshold. " + "If streamed, the body will not be stored in any way. Understands k/m/g suffixes, i.e. 3m for 3 megabytes." + ) group = parser.add_argument_group("Proxy Options") # We could make a mutually exclusive group out of -R, -U, -T, but we don't do that because @@ -251,8 +258,8 @@ def common_options(parser): # - our own error messages are more helpful group.add_argument( "-b", - action="store", type = str, dest="addr", default='', - help = "Address to bind proxy to (defaults to all interfaces)" + action="store", type=str, dest="addr", default='', + help="Address to bind proxy to (defaults to all interfaces)" ) group.add_argument( "-U", @@ -266,8 +273,8 @@ def common_options(parser): ) group.add_argument( "-p", - action="store", type = int, dest="port", default=8080, - help = "Proxy service port." + action="store", type=int, dest="port", default=8080, + help="Proxy service port." ) group.add_argument( "-R", @@ -280,7 +287,6 @@ def common_options(parser): help="Set transparent proxy mode." ) - group = parser.add_argument_group( "Advanced Proxy Options", """ @@ -304,7 +310,6 @@ def common_options(parser): help="Override the destination server all requests are sent to: http[s][2http[s]]://host[:port]" ) - group = parser.add_argument_group("Web App") group.add_argument( "-a", @@ -315,7 +320,7 @@ def common_options(parser): "--app-host", action="store", dest="app_host", default=APP_HOST, metavar="host", help="Domain to serve the app from. For transparent mode, use an IP when\ - a DNS entry for the app domain is not present. Default: %s"%APP_HOST + a DNS entry for the app domain is not present. Default: %s" % APP_HOST ) group.add_argument( @@ -329,7 +334,6 @@ def common_options(parser): help="Serve the app outside of the proxy." ) - group = parser.add_argument_group("Client Replay") group.add_argument( "-c", @@ -352,22 +356,21 @@ def common_options(parser): "--rheader", action="append", dest="rheaders", type=str, help="Request headers to be considered during replay. " - "Can be passed multiple times." + "Can be passed multiple times." ) group.add_argument( "--norefresh", action="store_true", dest="norefresh", default=False, - help= "Disable response refresh, " - "which updates times in cookies and headers for replayed responses." + help="Disable response refresh, " + "which updates times in cookies and headers for replayed responses." ) group.add_argument( "--no-pop", action="store_true", dest="nopop", default=False, help="Disable response pop from response flow. " - "This makes it possible to replay same response multiple times." + "This makes it possible to replay same response multiple times." ) - group = parser.add_argument_group( "Replacements", """ @@ -389,7 +392,6 @@ def common_options(parser): help="Replacement pattern, where the replacement clause is a path to a file." ) - group = parser.add_argument_group( "Set Headers", """ @@ -405,7 +407,6 @@ def common_options(parser): help="Header set pattern." ) - group = parser.add_argument_group( "Proxy Authentication", """ @@ -434,5 +435,4 @@ def common_options(parser): help="Allow access to users specified in an Apache htpasswd file." ) - config.ssl_option_group(parser) diff --git a/libmproxy/console/__init__.py b/libmproxy/console/__init__.py index e660f312b..073e08825 100644 --- a/libmproxy/console/__init__.py +++ b/libmproxy/console/__init__.py @@ -343,6 +343,7 @@ class Options(object): "server_replay", "stickycookie", "stickyauth", + "stream_large_bodies", "verbosity", "wfile", "nopop", @@ -391,6 +392,8 @@ class ConsoleMaster(flow.FlowMaster): print >> sys.stderr, "Sticky auth error:", r sys.exit(1) + self.set_stream_large_bodies(options.stream_large_bodies) + self.refresh_server_playback = options.refresh_server_playback self.anticache = options.anticache self.anticomp = options.anticomp diff --git a/libmproxy/dump.py b/libmproxy/dump.py index 1f1072416..aeb34cc32 100644 --- a/libmproxy/dump.py +++ b/libmproxy/dump.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import sys, os import netlib.utils from . import flow, filt, utils +from .protocol import http class DumpError(Exception): pass @@ -30,6 +31,7 @@ class Options(object): "showhost", "stickycookie", "stickyauth", + "stream_large_bodies", "verbosity", "wfile", ] @@ -69,6 +71,8 @@ class DumpMaster(flow.FlowMaster): self.showhost = options.showhost self.refresh_server_playback = options.refresh_server_playback + self.set_stream_large_bodies(options.stream_large_bodies) + if filtstr: self.filt = filt.parse(filtstr) else: @@ -80,6 +84,7 @@ class DumpMaster(flow.FlowMaster): if options.stickyauth: self.set_stickyauth(options.stickyauth) + if options.wfile: path = os.path.expanduser(options.wfile) try: @@ -157,12 +162,17 @@ class DumpMaster(flow.FlowMaster): if f.response: if self.o.flow_detail > 0: - sz = utils.pretty_size(len(f.response.content)) + if f.response.content == http.CONTENT_MISSING: + sz = "(content missing)" + else: + sz = utils.pretty_size(len(f.response.content)) result = " << %s %s"%(str_response(f.response), sz) if self.o.flow_detail > 1: result = result + "\n\n" + self.indent(4, f.response.headers) if self.o.flow_detail > 2: - if utils.isBin(f.response.content): + if f.response.content == http.CONTENT_MISSING: + cont = self.indent(4, "(content missing)") + elif utils.isBin(f.response.content): d = netlib.utils.hexdump(f.response.content) d = "\n".join("%s\t%s %s"%i for i in d) cont = self.indent(4, d) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 550977569..6b751bc9e 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -145,6 +145,17 @@ class SetHeaders: f.request.headers.add(header, value) +class StreamLargeBodies(object): + def __init__(self, max_size): + self.max_size = max_size + + def run(self, flow, is_request): + r = flow.request if is_request else flow.response + code = flow.response.code if flow.response else None + expected_size = netlib.http.expected_http_body_size(r.headers, is_request, flow.request.method, code) + if not (0 <= expected_size <= self.max_size): + r.stream = True + class ClientPlaybackState: def __init__(self, flows, exit): self.flows, self.exit = flows, exit @@ -437,6 +448,7 @@ class FlowMaster(controller.Master): self.anticache = False self.anticomp = False + self.stream_large_bodies = False self.refresh_server_playback = False self.replacehooks = ReplaceHooks() self.setheaders = SetHeaders() @@ -522,6 +534,12 @@ class FlowMaster(controller.Master): self.stickycookie_state = None self.stickycookie_txt = None + def set_stream_large_bodies(self, max_size): + if max_size is not None: + self.stream_large_bodies = StreamLargeBodies(max_size) + else: + self.stream_large_bodies = False + def set_stickyauth(self, txt): if txt: flt = filt.parse(txt) @@ -708,6 +726,10 @@ class FlowMaster(controller.Master): def handle_responseheaders(self, f): self.run_script_hook("responseheaders", f) + + if self.stream_large_bodies: + self.stream_large_bodies.run(f, False) + f.reply() return f diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 31dd39f59..4648c7cf1 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -924,7 +924,9 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): self.c.channel.ask("responseheaders", flow) # now get the rest of the request body, if body still needs to be read but not streaming this response - if not flow.response.stream and flow.response.content is None: + if flow.response.stream: + flow.response.content = CONTENT_MISSING + else: flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False) @@ -937,20 +939,19 @@ class HTTPHandler(ProtocolHandler, TemporaryServerChangeMixin): if response_reply is None or response_reply == KILL: return False - if flow.response.content is not None: - # if not streaming or there is no body to be read, we'll already have the body, just send it + if not flow.response.stream: + # no streaming: + # we already received the full response from the server and can send it to the client straight away. self.c.client_conn.send(flow.response._assemble()) else: - - # if streaming, we still need to read the body and stream its bits back to the client - - # start with head + # streaming: + # First send the body and then transfer the response incrementally: h = flow.response._assemble_head(preserve_transfer_encoding=True) self.c.client_conn.send(h) - for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, flow.response.headers, - self.c.config.body_size_limit, "GET", 200, False, 4096): + self.c.config.body_size_limit, flow.request.method, + flow.response.code, False, 4096): for part in chunk: self.c.client_conn.wfile.write(part) self.c.client_conn.wfile.flush()