diff --git a/docs/src/content/overview-features.md b/docs/src/content/overview-features.md index 73b990b4f..8842c453e 100644 --- a/docs/src/content/overview-features.md +++ b/docs/src/content/overview-features.md @@ -48,19 +48,19 @@ The `modify_body` option lets you specify an arbitrary number of patterns that define replacements within bodies of flows. A replacement pattern looks like this: {{< highlight none >}} -[/patt]/regex/replacement +[/flow-filter]/regex/replacement {{< / highlight >}} -Here, **patt** is a mitmproxy [filter expression]({{< relref "concepts-filters">}}) +Here, **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) that defines which flows a replacement applies to, **regex** is a valid Python regular expression that defines what gets replaced, and **replacement** is a string literal that is substituted in. The separator is arbitrary, and defined by the first character. If the replacement string literal starts with `@`, it is treated as a file path from which the replacement is read. -Replace hooks fire when either a client request or a server response is +Modify hooks fire when either a client request or a server response is received. Only the matching flow component is affected: so, for example, -if a replace hook is triggered on server response, the replacement is +if a modify hook is triggered on server response, the replacement is only run on the Response object leaving the Request intact. You control whether the hook triggers on the request, response or both using the filter pattern. If you need finer-grained control than this, it's simple @@ -88,16 +88,17 @@ New headers can be added, and existing headers can be overwritten or removed. A `modify_headers` expression looks like this: {{< highlight none >}} -[/patt]/name/value +[/flow-filter]/name/value {{< / highlight >}} -Here, **patt** is a mitmproxy [filter expression]({{< relref "concepts-filters">}}) +Here, **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) that defines which flows to modify headers on, e.g., only on responses using ``~s``. The parameters **name** and **value** are the header name and the value to set respectively, e.g., ``/Host/example.org``. An empty **value** removes existing -headers with **name**, e.g., ``/Host/``. Existing headers are overwritten by -default. This can be changed using filter-expressions, e.g., ``!~h Host:`` to -ignore requests and responses with an existing ``Host`` header. +headers with **name**, e.g., ``/Host/``. If **value** starts with `@`, it is treated +as a file path from which the header value is read. Existing headers are overwritten +by default. This can be changed using a filter expression, e.g., the filter +``!~h Host:`` ignores requests and responses with an existing ``Host`` header. ## Proxy Authentication diff --git a/mitmproxy/addons/modifybody.py b/mitmproxy/addons/modifybody.py index 77bc0c93b..4bc9edae8 100644 --- a/mitmproxy/addons/modifybody.py +++ b/mitmproxy/addons/modifybody.py @@ -3,14 +3,13 @@ import re import typing from mitmproxy import exceptions -from mitmproxy import flowfilter from mitmproxy import ctx -from mitmproxy.addons.modifyheaders import parse_modify_hook +from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec class ModifyBody: def __init__(self): - self.lst = [] + self.replacements: typing.List[ModifySpec] = [] def load(self, loader): loader.add_option( @@ -23,62 +22,42 @@ class ModifyBody: ) def configure(self, updated): - """ - .modify_body is a list of tuples (flow_filter_pattern, regex, repl): - - flow_filter_pattern: a string specifying a flow filter pattern. - regex: a regular expression, as string. - repl: the replacement string - """ if "modify_body" in updated: - lst = [] - for rep in ctx.options.modify_body: + self.replacements = [] + for option in ctx.options.modify_body: try: - flow_filter_pattern, regex, repl = parse_modify_hook(rep) + spec = parse_modify_spec(option) + try: + # We should ideally escape here before trying to compile + re.compile(spec.subject) + except re.error: + raise ValueError(f"Invalid regular expression: {spec.subject}") except ValueError as e: raise exceptions.OptionsError( - "Invalid modify_body option: %s" % rep + f"Cannot parse modify_body option {option}: {e}" ) from e - flow_filter = flowfilter.parse(flow_filter_pattern) - if not flow_filter: - raise exceptions.OptionsError( - "Invalid modify_body flow filter: %s" % flow_filter_pattern - ) - try: - # We should ideally escape here before trying to compile - re.compile(regex) - except re.error as e: - raise exceptions.OptionsError( - "Invalid regular expression: %s - %s" % (regex, str(e)) - ) - if repl.startswith(b"@") and not os.path.isfile(repl[1:]): - raise exceptions.OptionsError( - "Invalid file path: {}".format(repl[1:]) - ) - lst.append((regex, repl, flow_filter)) - self.lst = lst + self.replacements.append(spec) - def execute(self, f): - for regex, repl, flow_filter in self.lst: - if flow_filter(f): - if f.response: - self.replace(f.response, regex, repl) + def run(self, flow): + for spec in self.replacements: + if spec.matches(flow): + if flow.response: + self.replace(flow.response, spec.subject, spec.replacement) else: - self.replace(f.request, regex, repl) + self.replace(flow.request, spec.subject, spec.replacement) def request(self, flow): if not flow.reply.has_message: - self.execute(flow) + self.run(flow) def response(self, flow): if not flow.reply.has_message: - self.execute(flow) + self.run(flow) def replace(self, obj, search, repl): """ - Replaces a regular expression pattern with repl in the body of the message. - Encoded body will be decoded before replacement, and re-encoded afterwards. + Replaces all matches of the regex search in the body of the message with repl. Returns: The number of replacements made. diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index ca694feac..de5c90ecf 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -1,3 +1,4 @@ +import os import typing from mitmproxy import exceptions @@ -6,87 +7,116 @@ from mitmproxy.utils import strutils from mitmproxy import ctx -def parse_modify_hook(s): +class ModifySpec(typing.NamedTuple): """ - Returns a (flow_filter, header_name, header_value) tuple. + match_str: a string specifying a flow filter pattern. + matches: the parsed match_str as a flowfilter.TFilter object + subject: a header name for ModifyHeaders and a regex pattern for ModifyBody + replacement: the replacement string + """ + match_str: str + matches: flowfilter.TFilter + subject: bytes + replacement: bytes - The general form for a modify_headers hook is as follows: - [/flow_filter]/header_name/header_value +def parse_modify_spec(option) -> ModifySpec: + """ + The form for the modify_* options is as follows: + + * modify_headers: [/flow-filter]/header-name/[@]header-value + * modify_body: [/flow-filter]/search-regex/[@]replace + + The @ allows to provide a file path that is used to read the respective option. + Both ModifyHeaders and ModifyBody use ModifySpec to represent a single rule. The first character specifies the separator. Example: :~q:foo:bar - If only two clauses are specified, the pattern is set to match - universally (i.e. ".*"). Example: + If only two clauses are specified, the flow filter is set to + match universally (i.e. ".*"). Example: /foo/bar Clauses are parsed from left to right. Extra separators are taken to be - part of the final clause. For instance, the replacement clause below is - "foo/bar/": + part of the final clause. For instance, the last parameter (header-value or + replace) below is "foo/bar/": /one/two/foo/bar/ """ - sep, rem = s[0], s[1:] + sep, rem = option[0], option[1:] parts = rem.split(sep, 2) if len(parts) == 2: - flow_filter = ".*" - header_name, header_value = parts + flow_filter_pattern = ".*" + subject, replacement = parts elif len(parts) == 3: - flow_filter, header_name, header_value = parts + flow_filter_pattern, subject, replacement = parts else: - raise ValueError( - "Invalid modify_* specifier: %s" % s - ) + raise ValueError(f"Invalid number of parameters (2 or 3 are expected)") - if isinstance(header_name, str): - header_name = strutils.escaped_str_to_bytes(header_name) - if isinstance(header_value, str): - header_value = strutils.escaped_str_to_bytes(header_value) + flow_filter = flowfilter.parse(flow_filter_pattern) + if not flow_filter: + raise ValueError(f"Invalid filter pattern: {flow_filter_pattern}") - return flow_filter, header_name, header_value + if isinstance(subject, str): + subject = strutils.escaped_str_to_bytes(subject) + if isinstance(replacement, str): + replacement = strutils.escaped_str_to_bytes(replacement) + + if replacement.startswith(b"@") and not os.path.isfile(replacement[1:]): + raise ValueError(f"Invalid file path: {replacement[1:]}") + + return ModifySpec(flow_filter_pattern, flow_filter, subject, replacement) class ModifyHeaders: def __init__(self): - self.lst = [] + self.replacements: typing.List[ModifySpec] = [] def load(self, loader): loader.add_option( "modify_headers", typing.Sequence[str], [], """ - Header modify pattern of the form "[/flow-filter]/header-name/header-value", where the - separator can be any character. An empty header-value removes existing header-name headers. + Header modify pattern of the form "[/flow-filter]/header-name/[@]header-value", where the + separator can be any character. The @ allows to provide a file path that is used to read + the header value string. An empty header-value removes existing header-name headers. """ ) def configure(self, updated): + self.replacements = [] if "modify_headers" in updated: - self.lst = [] - for shead in ctx.options.modify_headers: + for option in ctx.options.modify_headers: try: - flow_filter_pattern, header, value = parse_modify_hook(shead) + spec = parse_modify_spec(option) except ValueError as e: raise exceptions.OptionsError( - "Invalid modify_headers option: %s" % shead + f"Cannot parse modify_headers option {option}: {e}" ) from e + self.replacements.append(spec) - flow_filter = flowfilter.parse(flow_filter_pattern) - if not flow_filter: - raise exceptions.OptionsError( - "Invalid modify_headers flow filter %s" % flow_filter_pattern - ) - self.lst.append((flow_filter_pattern, flow_filter, header, value)) + def run(self, flow, hdrs): + # unset all specified headers + for spec in self.replacements: + if spec.matches(flow): + hdrs.pop(spec.subject, None) - def run(self, f, hdrs): - for _, flow_filter, header, value in self.lst: - if flow_filter(f): - hdrs.pop(header, None) - for _, flow_filter, header, value in self.lst: - if flow_filter(f) and value: - hdrs.add(header, value) + # set all specified headers if the replacement string is not empty + for spec in self.replacements: + if spec.replacement.startswith(b"@"): + path = os.path.expanduser(spec.replacement[1:]) + try: + with open(path, "rb") as file: + replacement = file.read() + except IOError: + ctx.log.warn(f"Could not read replacement file {path}") + return + else: + replacement = spec.replacement + + if spec.matches(flow) and replacement: + hdrs.add(spec.subject, replacement) def request(self, flow): if not flow.reply.has_message: diff --git a/test/mitmproxy/addons/test_modifybody.py b/test/mitmproxy/addons/test_modifybody.py index 0e7f4c72c..6431e4f84 100644 --- a/test/mitmproxy/addons/test_modifybody.py +++ b/test/mitmproxy/addons/test_modifybody.py @@ -1,31 +1,37 @@ import pytest from mitmproxy.addons import modifybody -from mitmproxy.addons.modifyheaders import parse_modify_hook +from mitmproxy.addons.modifyheaders import parse_modify_spec from mitmproxy.test import taddons from mitmproxy.test import tflow class TestModifyBody: - def test_parse_modify_hook(self): - x = parse_modify_hook("/foo/bar/voing") - assert x == ("foo", b"bar", b"voing") - x = parse_modify_hook("/foo/bar/vo/ing/") - assert x == ("foo", b"bar", b"vo/ing/") - x = parse_modify_hook("/bar/voing") - assert x == (".*", b"bar", b"voing") - with pytest.raises(Exception, match="Invalid modify_\\* specifier"): - parse_modify_hook("/") + def test_parse_modify_spec(self): + x = parse_modify_spec("/foo/bar/voing") + assert [x[0], x[2], x[3]] == ["foo", b"bar", b"voing"] + + x = parse_modify_spec("/foo/bar/vo/ing/") + assert [x[0], x[2], x[3]] == ["foo", b"bar", b"vo/ing/"] + + x = parse_modify_spec("/bar/voing") + assert [x[0], x[2], x[3]] == [".*", b"bar", b"voing"] + + with pytest.raises(Exception, match="Invalid number of parameters"): + parse_modify_spec("/") + + with pytest.raises(Exception, match="Invalid filter pattern"): + parse_modify_spec("/~b/one/two") def test_configure(self): mb = modifybody.ModifyBody() with taddons.context(mb) as tctx: tctx.configure(mb, modify_body=["one/two/three"]) - with pytest.raises(Exception, match="Invalid modify_body option"): + with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid number"): tctx.configure(mb, modify_body = ["/"]) - with pytest.raises(Exception, match="Invalid modify_body flow filter"): + with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid filter"): tctx.configure(mb, modify_body=["/~b/two/three"]) - with pytest.raises(Exception, match="Invalid regular expression"): + with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid regular expression"): tctx.configure(mb, modify_body=["/foo/+/three"]) tctx.configure(mb, modify_body=["/a/b/c/"]) diff --git a/test/mitmproxy/addons/test_modifyheaders.py b/test/mitmproxy/addons/test_modifyheaders.py index 88bef624c..bf75d4315 100644 --- a/test/mitmproxy/addons/test_modifyheaders.py +++ b/test/mitmproxy/addons/test_modifyheaders.py @@ -7,30 +7,44 @@ from mitmproxy.addons import modifyheaders class TestModifyHeaders: - def test_parse_modify_hook(self): - x = modifyheaders.parse_modify_hook("/foo/bar/voing") - assert x == ("foo", b"bar", b"voing") - x = modifyheaders.parse_modify_hook("/foo/bar/vo/ing/") - assert x == ("foo", b"bar", b"vo/ing/") - x = modifyheaders.parse_modify_hook("/bar/voing") - assert x == (".*", b"bar", b"voing") - with pytest.raises(Exception, match="Invalid modify_\\* specifier"): - modifyheaders.parse_modify_hook("/") + def test_parse_modify_spec(self): + x = modifyheaders.parse_modify_spec("/foo/bar/voing") + assert [x[0], x[2], x[3]] == ["foo", b"bar", b"voing"] + + x = modifyheaders.parse_modify_spec("/foo/bar/vo/ing/") + assert [x[0], x[2], x[3]] == ["foo", b"bar", b"vo/ing/"] + + x = modifyheaders.parse_modify_spec("/bar/voing") + assert [x[0], x[2], x[3]] == [".*", b"bar", b"voing"] + + with pytest.raises(Exception, match="Invalid number of parameters"): + modifyheaders.parse_modify_spec("/") + + with pytest.raises(Exception, match="Invalid filter pattern"): + modifyheaders.parse_modify_spec("/~b/one/two") + + with pytest.raises(Exception, match="Invalid file path"): + modifyheaders.parse_modify_spec("/~q/foo/@nonexistent") def test_configure(self): - sh = modifyheaders.ModifyHeaders() - with taddons.context(sh) as tctx: - with pytest.raises(Exception, match="Invalid modify_headers option"): - tctx.configure(sh, modify_headers = ["/"]) - with pytest.raises(Exception, match="Invalid modify_headers flow filter"): - tctx.configure(sh, modify_headers = ["/~b/one/two"]) - tctx.configure(sh, modify_headers = ["/foo/bar/voing"]) + mh = modifyheaders.ModifyHeaders() + with taddons.context(mh) as tctx: + with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid number"): + tctx.configure(mh, modify_headers = ["/"]) + + with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid filter"): + tctx.configure(mh, modify_headers = ["/~b/one/two"]) + + with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid file"): + tctx.configure(mh, modify_headers = ["/~q/foo/@nonexistent"]) + + tctx.configure(mh, modify_headers = ["/foo/bar/voing"]) def test_modify_headers(self): - sh = modifyheaders.ModifyHeaders() - with taddons.context(sh) as tctx: + mh = modifyheaders.ModifyHeaders() + with taddons.context(mh) as tctx: tctx.configure( - sh, + mh, modify_headers = [ "/~q/one/two", "/~s/one/three" @@ -38,16 +52,16 @@ class TestModifyHeaders: ) f = tflow.tflow() f.request.headers["one"] = "xxx" - sh.request(f) + mh.request(f) assert f.request.headers["one"] == "two" f = tflow.tflow(resp=True) f.response.headers["one"] = "xxx" - sh.response(f) + mh.response(f) assert f.response.headers["one"] == "three" tctx.configure( - sh, + mh, modify_headers = [ "/~s/one/two", "/~s/one/three" @@ -56,11 +70,11 @@ class TestModifyHeaders: f = tflow.tflow(resp=True) f.request.headers["one"] = "xxx" f.response.headers["one"] = "xxx" - sh.response(f) + mh.response(f) assert f.response.headers.get_all("one") == ["two", "three"] tctx.configure( - sh, + mh, modify_headers = [ "/~q/one/two", "/~q/one/three" @@ -68,12 +82,12 @@ class TestModifyHeaders: ) f = tflow.tflow() f.request.headers["one"] = "xxx" - sh.request(f) + mh.request(f) assert f.request.headers.get_all("one") == ["two", "three"] # test removal of existing headers tctx.configure( - sh, + mh, modify_headers = [ "/~q/one/", "/~s/one/" @@ -81,26 +95,64 @@ class TestModifyHeaders: ) f = tflow.tflow() f.request.headers["one"] = "xxx" - sh.request(f) + mh.request(f) assert "one" not in f.request.headers f = tflow.tflow(resp=True) f.response.headers["one"] = "xxx" - sh.response(f) + mh.response(f) assert "one" not in f.response.headers tctx.configure( - sh, + mh, modify_headers = [ "/one/" ] ) f = tflow.tflow() f.request.headers["one"] = "xxx" - sh.request(f) + mh.request(f) assert "one" not in f.request.headers f = tflow.tflow(resp=True) f.response.headers["one"] = "xxx" - sh.response(f) - assert "one" not in f.response.headers \ No newline at end of file + mh.response(f) + assert "one" not in f.response.headers + + +class TestModifyHeadersFile: + def test_simple(self, tmpdir): + mh = modifyheaders.ModifyHeaders() + with taddons.context(mh) as tctx: + tmpfile = tmpdir.join("replacement") + tmpfile.write("two") + tctx.configure( + mh, + modify_headers=["/~q/one/@" + str(tmpfile)] + ) + f = tflow.tflow() + f.request.headers["one"] = "xxx" + mh.request(f) + assert f.request.headers["one"] == "two" + + @pytest.mark.asyncio + async def test_nonexistent(self, tmpdir): + mh = modifyheaders.ModifyHeaders() + with taddons.context(mh) as tctx: + with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid file path"): + tctx.configure( + mh, + modify_headers=["/~q/foo/@nonexistent"] + ) + + tmpfile = tmpdir.join("replacement") + tmpfile.write("bar") + tctx.configure( + mh, + modify_headers=["/~q/foo/@" + str(tmpfile)] + ) + tmpfile.remove() + f = tflow.tflow() + f.request.content = b"foo" + mh.request(f) + assert await tctx.master.await_log("could not read")