From 6dc0f105ccabeb10f557dc8baa51d3ce08b3c8ee Mon Sep 17 00:00:00 2001 From: alts Date: Sat, 16 Jul 2011 02:47:06 -0700 Subject: [PATCH] Adds support for content encoding, namely gip and deflate --- libmproxy/cmdline.py | 2 +- libmproxy/console.py | 40 +++++++++++++++++++++++++--------------- libmproxy/encoding.py | 43 +++++++++++++++++++++++++++++++++++++++++++ libmproxy/flow.py | 9 ++++++--- libmproxy/proxy.py | 21 +++++++++++++++------ test/test_encoding.py | 31 +++++++++++++++++++++++++++++++ test/test_utils.py | 2 +- 7 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 libmproxy/encoding.py create mode 100644 test/test_encoding.py diff --git a/libmproxy/cmdline.py b/libmproxy/cmdline.py index e3e6ef4e5..2d78e5d1b 100644 --- a/libmproxy/cmdline.py +++ b/libmproxy/cmdline.py @@ -111,7 +111,7 @@ def common_options(parser): ) parser.add_option( "-z", - action="store_false", dest="anticomp", default=True, + action="store_false", dest="anticomp", default=False, help="Try to convince servers to send us un-compressed data." ) diff --git a/libmproxy/console.py b/libmproxy/console.py index 09149186e..c1e14b332 100644 --- a/libmproxy/console.py +++ b/libmproxy/console.py @@ -1,15 +1,15 @@ # Copyright (C) 2010 Aldo Cortesi -# +# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see . @@ -18,7 +18,7 @@ import os.path, sys import cStringIO import urwid.raw_display import urwid -import controller, utils, filt, proxy, flow +import controller, utils, filt, proxy, flow, encoding VIEW_CUTOFF = 1024*100 @@ -77,7 +77,7 @@ def format_flow(f, focus, extended=False, padding=2): else: ts = " " - txt.append("\n") + txt.append("\n") txt.append(("text", ts)) txt.append(" "*(padding+2)) met = "" @@ -97,6 +97,11 @@ def format_flow(f, focus, extended=False, padding=2): if t: t = t[0].split(";")[0] txt.append(("text", " %s"%t)) + e = f.response.headers["content-encoding"] + if e: + e = e[0] + else: + e = "identity" if f.response.content: txt.append(", %s"%utils.pretty_size(len(f.response.content))) elif f.error: @@ -121,7 +126,7 @@ def int_version(v): for i in range(min(SIG, len(v))): x += int(v[i]) * 10**(SIG-i) return x - + # We have to do this to be portable over 0.9.8 and 0.9.9 If compatibility # becomes a pain to maintain, we'll just mandate 0.9.9 or newer. @@ -295,8 +300,13 @@ class ConnectionView(WWrap): def _conn_text(self, conn, viewmode): if conn: + e = conn.headers["content-encoding"] + if e: + e = e[0] + else: + e = "identity" return self.master._cached_conn_text( - conn.content, + encoding.decode(e, conn.content), tuple([tuple(i) for i in conn.headers.lst]), viewmode ) @@ -395,7 +405,7 @@ class ConnectionView(WWrap): response = self.flow.response response.msg = msg self.master.refresh_connection(self.flow) - + def edit(self, part): if self.state.view_flow_mode == VIEW_FLOW_REQUEST: conn = self.flow.request @@ -577,7 +587,7 @@ class PathEdit(urwid.Edit, _PathCompleter): else: self.reset() return urwid.Edit.keypress(self, size, key) - + class ActionBar(WWrap): def __init__(self): @@ -656,7 +666,7 @@ class StatusBar(WWrap): ('statusbar_text', ("[%s]"%len(self.master.state.flow_list)).ljust(7)), ] t.extend(self.get_status()) - + if self.master.server: boundaddr = "[%s:%s]"%(self.master.server.address or "*", self.master.server.port) else: @@ -821,9 +831,9 @@ class ConsoleMaster(flow.FlowMaster): self.set_palette() if options.response_script: - self.set_response_script(options.response_script) + self.set_response_script(options.response_script) if options.request_script: - self.set_request_script(options.request_script) + self.set_request_script(options.request_script) r = self.set_limit(options.limit) if r: @@ -1157,7 +1167,7 @@ class ConsoleMaster(flow.FlowMaster): def _write_flows(self, path, flows): self.state.last_saveload = path if not path: - return + return path = os.path.expanduser(path) try: f = file(path, "wb") @@ -1176,7 +1186,7 @@ class ConsoleMaster(flow.FlowMaster): def load_flows(self, path): if not path: - return + return self.state.last_saveload = path path = os.path.expanduser(path) try: @@ -1307,7 +1317,7 @@ class ConsoleMaster(flow.FlowMaster): def prompt_onekey(self, prompt, keys, callback): """ Keys are a set of (word, key) tuples. The appropriate key in the - word is highlighted. + word is highlighted. """ prompt = [prompt, " ("] mkup = [] diff --git a/libmproxy/encoding.py b/libmproxy/encoding.py new file mode 100644 index 000000000..f280ed9fc --- /dev/null +++ b/libmproxy/encoding.py @@ -0,0 +1,43 @@ +""" + Utility functions for decoding response bodies. +""" +import cStringIO +import gzip, zlib + +__ALL__ = ["ENCODINGS"] + +ENCODINGS = set(["identity", "gzip", "deflate"]) + +def decode(encoding, content): + encoding_map = { + "identity": decode_identity, + "gzip": decode_gzip, + "deflate": decode_deflate, + } + + return encoding_map.get(encoding, decode_identity)(content) + +def decode_identity(content): + """ + Returns content unchanged. Identity is the default value of + Accept-Encoding headers. + """ + return content + +def decode_gzip(content): + gfile = gzip.GzipFile(fileobj=cStringIO.StringIO(content)) + return gfile.read() + +def decode_deflate(content): + """ + Returns decompress data for DEFLATE. Some servers may respond with + compressed data without a zlib header or checksum. An undocumented + feature of zlib permits the lenient decompression of data missing both + values. + + http://bugs.python.org/issue5784 + """ + try: + return zlib.decompress(content) + except zlib.error: + return zlib.decompress(content, -15) \ No newline at end of file diff --git a/libmproxy/flow.py b/libmproxy/flow.py index bd07cfa7a..568ec3d14 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -84,10 +84,10 @@ class ServerPlaybackState: def count(self): return sum([len(i) for i in self.fmap.values()]) - + def _hash(self, flow): """ - Calculates a loose hash of the flow request. + Calculates a loose hash of the flow request. """ r = flow.request key = [ @@ -130,7 +130,7 @@ class StickyCookieState: def ckey(self, m, f): """ - Returns a (domain, port, path) tuple. + Returns a (domain, port, path) tuple. """ return ( m["domain"] or f.request.host, @@ -568,6 +568,9 @@ class FlowMaster(controller.Master): f.request.anticache() if self.anticomp: f.request.anticomp() + else: + f.request.constrain_encoding() + if self.server_playback: pb = self.do_server_playback(f) if not pb: diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py index fe545335a..a7cc31e86 100644 --- a/libmproxy/proxy.py +++ b/libmproxy/proxy.py @@ -1,7 +1,7 @@ """ A simple proxy server implementation, which always reads all of a server response into memory, performs some transformation, and then writes it back - to the client. + to the client. Development started from Neil Schemenauer's munchy.py """ @@ -9,7 +9,7 @@ import sys, os, string, socket, urlparse, re, select, copy, base64, time, Cookie from email.utils import parsedate_tz, formatdate, mktime_tz import shutil, tempfile import optparse, SocketServer, ssl -import utils, controller +import utils, controller, encoding NAME = "mitmproxy" @@ -53,7 +53,7 @@ def read_chunked(fp): if line == '\r\n' or line == '\n': break return content - + def read_http_body(rfile, connection, headers, all): if 'transfer-encoding' in headers: @@ -156,11 +156,21 @@ class Request(controller.Msg): def anticomp(self): """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. + Modifies this request to remove headers that will compress the + resource's data. """ self.headers["accept-encoding"] = ["identity"] + def constrain_encoding(self): + """ + Limits the permissible Accept-Encoding values, based on what we can + decode appropriately. + """ + if self.headers["accept-encoding"]: + self.headers["accept-encoding"] = [', '.join([ + e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] + ])] + def set_replay(self): self.client_conn = None @@ -381,7 +391,6 @@ class Response(controller.Msg): modifications to make sure interception works properly. """ headers = self.headers.copy() - utils.try_del(headers, 'accept-encoding') utils.try_del(headers, 'proxy-connection') utils.try_del(headers, 'connection') utils.try_del(headers, 'keep-alive') diff --git a/test/test_encoding.py b/test/test_encoding.py new file mode 100644 index 000000000..ba0755d66 --- /dev/null +++ b/test/test_encoding.py @@ -0,0 +1,31 @@ +from libmproxy import encoding +import libpry + +import cStringIO +import gzip, zlib + +class udecode_identity(libpry.AutoTree): + def test_decode(self): + assert 'string' == encoding.decode('identity', 'string') + + def test_fallthrough(self): + assert 'string' == encoding.decode('nonexistent encoding', 'string') + +class udecode_gzip(libpry.AutoTree): + def test_simple(self): + s = cStringIO.StringIO() + gf = gzip.GzipFile(fileobj=s, mode='wb') + gf.write('string') + gf.close() + assert 'string' == encoding.decode('gzip', s.getvalue()) + +class udecode_deflate(libpry.AutoTree): + def test_simple(self): + assert 'string' == encoding.decode('deflate', zlib.compress('string')) + assert 'string' == encoding.decode('deflate', zlib.compress('string')[2:-4]) + +tests = [ + udecode_identity(), + udecode_gzip(), + udecode_deflate() +] diff --git a/test/test_utils.py b/test/test_utils.py index 2b0f43422..2ff951d47 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -98,7 +98,7 @@ class uHeaders(libpry.AutoTree): out = repr(self.hd) for i in expected: assert out.find(i) >= 0 - + def test_dictToHeader2(self): self.hd["one"] = ["uno"] expected1 = "one: uno\r\n"