From 7b4e219c4e51c0646e0f1543c5e470a09a3decb9 Mon Sep 17 00:00:00 2001 From: Matthew Hughes <34972397+matthewhughes934@users.noreply.github.com> Date: Thu, 23 Sep 2021 14:55:43 +0100 Subject: [PATCH] mitmweb: handle {en,de}coding on server-side (#4811) * mitmweb: handle {en,de}coding on server-side Handle this server-side rather than passing the message content encoding details back when fetching flow content. If {en,de}coding fails, return the raw request contents. This addresses https://github.com/mitmproxy/mitmproxy/issues/4809 * fix typo Co-authored-by: Maximilian Hils --- mitmproxy/tools/web/app.py | 7 +------ test/mitmproxy/tools/web/test_app.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/mitmproxy/tools/web/app.py b/mitmproxy/tools/web/app.py index 4034314ed..9deba965e 100644 --- a/mitmproxy/tools/web/app.py +++ b/mitmproxy/tools/web/app.py @@ -439,11 +439,6 @@ class FlowContent(RequestHandler): def get(self, flow_id, message): message = getattr(self.flow, message) - content_encoding = message.headers.get("Content-Encoding", None) - if content_encoding: - content_encoding = re.sub(r"[^\w]", "", content_encoding) - self.set_header("Content-Encoding", content_encoding) - original_cd = message.headers.get("Content-Disposition", None) filename = None if original_cd: @@ -459,7 +454,7 @@ class FlowContent(RequestHandler): self.set_header("Content-Type", "application/text") self.set_header("X-Content-Type-Options", "nosniff") self.set_header("X-Frame-Options", "DENY") - self.write(message.raw_content) + self.write(message.get_content(strict=False)) class FlowContentView(RequestHandler): diff --git a/test/mitmproxy/tools/web/test_app.py b/test/mitmproxy/tools/web/test_app.py index 47f23d9c7..99cbb61bd 100644 --- a/test/mitmproxy/tools/web/test_app.py +++ b/test/mitmproxy/tools/web/test_app.py @@ -1,5 +1,6 @@ import asyncio import io +import gzip import json import logging import textwrap @@ -290,12 +291,10 @@ class TestApp(tornado.testing.AsyncHTTPTestCase): def test_flow_content(self): f = self.view.get_by_id("42") f.backup() - f.response.headers["Content-Encoding"] = "ran\x00dom" f.response.headers["Content-Disposition"] = 'inline; filename="filename.jpg"' r = self.fetch("/flows/42/response/content.data") assert r.body == b"message" - assert r.headers["Content-Encoding"] == "random" assert r.headers["Content-Disposition"] == 'attachment; filename="filename.jpg"' del f.response.headers["Content-Disposition"] @@ -311,6 +310,21 @@ class TestApp(tornado.testing.AsyncHTTPTestCase): f.revert() + def test_flow_content_returns_raw_content_when_decoding_fails(self): + f = self.view.get_by_id("42") + f.backup() + + f.response.headers["Content-Encoding"] = "gzip" + # replace gzip magic number with garbage + invalid_encoded_content = gzip.compress(b"Hello world!").replace(b"\x1f\x8b", b"\xff\xff") + f.response.raw_content = invalid_encoded_content + + r = self.fetch("/flows/42/response/content.data") + assert r.body == invalid_encoded_content + assert r.code == 200 + + f.revert() + def test_update_flow_content(self): assert self.fetch( "/flows/42/request/content.data",