From a1a792aeac20eab1fd2f2e1a91d34ba990fc111b Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 22 Oct 2016 18:47:12 -0700 Subject: [PATCH] various encoding fixes, fix #1650 --- examples/har_dump.py | 39 ++++++++++++--------------- pathod/language/generators.py | 2 +- test/mitmproxy/test_examples.py | 21 +++++++++++++++ test/mitmproxy/test_flow.py | 2 +- test/mitmproxy/test_utils_strutils.py | 2 +- 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/examples/har_dump.py b/examples/har_dump.py index 3ba08968b..efcf9d74b 100644 --- a/examples/har_dump.py +++ b/examples/har_dump.py @@ -3,7 +3,6 @@ This inline script can be used to dump flows as HAR files. """ -import pprint import json import sys import base64 @@ -128,19 +127,22 @@ def response(flow): "timings": timings, } - # Store binay data as base64 + # Store binary data as base64 if strutils.is_mostly_bin(flow.response.content): - b64 = base64.b64encode(flow.response.content) - entry["response"]["content"]["text"] = b64.decode('ascii') + entry["response"]["content"]["text"] = base64.b64encode(flow.response.content).decode() entry["response"]["content"]["encoding"] = "base64" else: - entry["response"]["content"]["text"] = flow.response.text + entry["response"]["content"]["text"] = flow.response.get_text(strict=False) if flow.request.method in ["POST", "PUT", "PATCH"]: + params = [ + {"name": a.decode("utf8", "surrogateescape"), "value": b.decode("utf8", "surrogateescape")} + for a, b in flow.request.urlencoded_form.items(multi=True) + ] entry["request"]["postData"] = { "mimeType": flow.request.headers.get("Content-Type", "").split(";")[0], - "text": _always_string(flow.request.content), - "params": name_value(flow.request.urlencoded_form) + "text": flow.request.get_text(strict=False), + "params": params } if flow.server_conn: @@ -155,16 +157,17 @@ def done(): """ dump_file = sys.argv[1] + json_dump = json.dumps(HAR, indent=2) # type: str + if dump_file == '-': - mitmproxy.ctx.log(pprint.pformat(HAR)) + mitmproxy.ctx.log(json_dump) else: - json_dump = json.dumps(HAR, indent=2) - + raw = json_dump.encode() # type: bytes if dump_file.endswith('.zhar'): - json_dump = zlib.compress(json_dump, 9) + raw = zlib.compress(raw, 9) - with open(dump_file, "w") as f: - f.write(json_dump) + with open(dump_file, "wb") as f: + f.write(raw) mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump)) @@ -213,12 +216,4 @@ def name_value(obj): """ Convert (key, value) pairs to HAR format. """ - return [{"name": _always_string(k), "value": _always_string(v)} for k, v in obj.items()] - -def _always_string(byte_or_str): - """ - Makes sure we get text back instead of `bytes` since json.dumps dies on `bytes` - """ - if isinstance(byte_or_str, bytes): - return byte_or_str.decode('utf8') - return byte_or_str + return [{"name": k, "value": v} for k, v in obj.items()] diff --git a/pathod/language/generators.py b/pathod/language/generators.py index 4e19ecd91..d716804da 100644 --- a/pathod/language/generators.py +++ b/pathod/language/generators.py @@ -14,7 +14,7 @@ DATATYPES = dict( punctuation=string.punctuation.encode(), whitespace=string.whitespace.encode(), ascii=string.printable.encode(), - bytes=bytes(bytearray(range(256))) + bytes=bytes(range(256)) ) diff --git a/test/mitmproxy/test_examples.py b/test/mitmproxy/test_examples.py index 6972da0c7..3cd29a509 100644 --- a/test/mitmproxy/test_examples.py +++ b/test/mitmproxy/test_examples.py @@ -160,3 +160,24 @@ class TestHARDump: f = format_cookies([("n", "v", CA([("expires", "Mon, 24-Aug-2037 00:00:00 GMT")]))])[0] assert f['expires'] + + def test_binary(self): + + f = self.flow() + f.request.method = "POST" + f.request.headers["content-type"] = "application/x-www-form-urlencoded" + f.request.content = b"foo=bar&baz=s%c3%bc%c3%9f" + f.response.headers["random-junk"] = bytes(range(256)) + f.response.content = bytes(range(256)) + + with tutils.tmpdir() as tdir: + path = os.path.join(tdir, "somefile") + + m, sc = tscript("har_dump.py", shlex.quote(path)) + m.addons.invoke(m, "response", f) + m.addons.remove(sc) + + with open(path, "r") as inp: + har = json.load(inp) + + assert len(har["log"]["entries"]) == 1 diff --git a/test/mitmproxy/test_flow.py b/test/mitmproxy/test_flow.py index 5b9f38355..d16bb6dd0 100644 --- a/test/mitmproxy/test_flow.py +++ b/test/mitmproxy/test_flow.py @@ -346,7 +346,7 @@ class TestSerialize: sio = io.BytesIO() f = tutils.tflow() f.marked = True - f.request.content = bytes(bytearray(range(256))) + f.request.content = bytes(range(256)) w = mitmproxy.io.FlowWriter(sio) w.add(f) diff --git a/test/mitmproxy/test_utils_strutils.py b/test/mitmproxy/test_utils_strutils.py index d4c2883c0..84281c6b8 100644 --- a/test/mitmproxy/test_utils_strutils.py +++ b/test/mitmproxy/test_utils_strutils.py @@ -3,7 +3,7 @@ from mitmproxy.test import tutils def test_always_bytes(): - assert strutils.always_bytes(bytes(bytearray(range(256)))) == bytes(bytearray(range(256))) + assert strutils.always_bytes(bytes(range(256))) == bytes(range(256)) assert strutils.always_bytes("foo") == b"foo" with tutils.raises(ValueError): strutils.always_bytes(u"\u2605", "ascii")