refactor modify addons

Use a universal ModifySpec class to represent rules.
ModifyHeaders now supports reading the header value from a  file.
This commit is contained in:
Martin Plattner 2020-07-01 13:25:28 +02:00
parent 2ed7770716
commit 2a408c9379
5 changed files with 204 additions and 136 deletions

View File

@ -48,19 +48,19 @@ The `modify_body` option lets you specify an arbitrary number of patterns that
define replacements within bodies of flows. A replacement pattern looks like this:
{{< highlight none >}}
[/patt]/regex/replacement
[/flow-filter]/regex/replacement
{{< / highlight >}}
Here, **patt** is a mitmproxy [filter expression]({{< relref "concepts-filters">}})
Here, **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}})
that defines which flows a replacement applies to, **regex** is a valid Python
regular expression that defines what gets replaced, and **replacement** is a string
literal that is substituted in. The separator is arbitrary, and defined by the
first character. If the replacement string literal starts with `@`, it is treated
as a file path from which the replacement is read.
Replace hooks fire when either a client request or a server response is
Modify hooks fire when either a client request or a server response is
received. Only the matching flow component is affected: so, for example,
if a replace hook is triggered on server response, the replacement is
if a modify hook is triggered on server response, the replacement is
only run on the Response object leaving the Request intact. You control
whether the hook triggers on the request, response or both using the
filter pattern. If you need finer-grained control than this, it's simple
@ -88,16 +88,17 @@ New headers can be added, and existing headers can be overwritten or removed.
A `modify_headers` expression looks like this:
{{< highlight none >}}
[/patt]/name/value
[/flow-filter]/name/value
{{< / highlight >}}
Here, **patt** is a mitmproxy [filter expression]({{< relref "concepts-filters">}})
Here, **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}})
that defines which flows to modify headers on, e.g., only on responses using ``~s``.
The parameters **name** and **value** are the header name and the value to set
respectively, e.g., ``/Host/example.org``. An empty **value** removes existing
headers with **name**, e.g., ``/Host/``. Existing headers are overwritten by
default. This can be changed using filter-expressions, e.g., ``!~h Host:`` to
ignore requests and responses with an existing ``Host`` header.
headers with **name**, e.g., ``/Host/``. If **value** starts with `@`, it is treated
as a file path from which the header value is read. Existing headers are overwritten
by default. This can be changed using a filter expression, e.g., the filter
``!~h Host:`` ignores requests and responses with an existing ``Host`` header.
## Proxy Authentication

View File

@ -3,14 +3,13 @@ import re
import typing
from mitmproxy import exceptions
from mitmproxy import flowfilter
from mitmproxy import ctx
from mitmproxy.addons.modifyheaders import parse_modify_hook
from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec
class ModifyBody:
def __init__(self):
self.lst = []
self.replacements: typing.List[ModifySpec] = []
def load(self, loader):
loader.add_option(
@ -23,62 +22,42 @@ class ModifyBody:
)
def configure(self, updated):
"""
.modify_body is a list of tuples (flow_filter_pattern, regex, repl):
flow_filter_pattern: a string specifying a flow filter pattern.
regex: a regular expression, as string.
repl: the replacement string
"""
if "modify_body" in updated:
lst = []
for rep in ctx.options.modify_body:
self.replacements = []
for option in ctx.options.modify_body:
try:
flow_filter_pattern, regex, repl = parse_modify_hook(rep)
except ValueError as e:
raise exceptions.OptionsError(
"Invalid modify_body option: %s" % rep
) from e
flow_filter = flowfilter.parse(flow_filter_pattern)
if not flow_filter:
raise exceptions.OptionsError(
"Invalid modify_body flow filter: %s" % flow_filter_pattern
)
spec = parse_modify_spec(option)
try:
# We should ideally escape here before trying to compile
re.compile(regex)
except re.error as e:
re.compile(spec.subject)
except re.error:
raise ValueError(f"Invalid regular expression: {spec.subject}")
except ValueError as e:
raise exceptions.OptionsError(
"Invalid regular expression: %s - %s" % (regex, str(e))
)
if repl.startswith(b"@") and not os.path.isfile(repl[1:]):
raise exceptions.OptionsError(
"Invalid file path: {}".format(repl[1:])
)
lst.append((regex, repl, flow_filter))
self.lst = lst
f"Cannot parse modify_body option {option}: {e}"
) from e
def execute(self, f):
for regex, repl, flow_filter in self.lst:
if flow_filter(f):
if f.response:
self.replace(f.response, regex, repl)
self.replacements.append(spec)
def run(self, flow):
for spec in self.replacements:
if spec.matches(flow):
if flow.response:
self.replace(flow.response, spec.subject, spec.replacement)
else:
self.replace(f.request, regex, repl)
self.replace(flow.request, spec.subject, spec.replacement)
def request(self, flow):
if not flow.reply.has_message:
self.execute(flow)
self.run(flow)
def response(self, flow):
if not flow.reply.has_message:
self.execute(flow)
self.run(flow)
def replace(self, obj, search, repl):
"""
Replaces a regular expression pattern with repl in the body of the message.
Encoded body will be decoded before replacement, and re-encoded afterwards.
Replaces all matches of the regex search in the body of the message with repl.
Returns:
The number of replacements made.

View File

@ -1,3 +1,4 @@
import os
import typing
from mitmproxy import exceptions
@ -6,87 +7,116 @@ from mitmproxy.utils import strutils
from mitmproxy import ctx
def parse_modify_hook(s):
class ModifySpec(typing.NamedTuple):
"""
Returns a (flow_filter, header_name, header_value) tuple.
match_str: a string specifying a flow filter pattern.
matches: the parsed match_str as a flowfilter.TFilter object
subject: a header name for ModifyHeaders and a regex pattern for ModifyBody
replacement: the replacement string
"""
match_str: str
matches: flowfilter.TFilter
subject: bytes
replacement: bytes
The general form for a modify_headers hook is as follows:
[/flow_filter]/header_name/header_value
def parse_modify_spec(option) -> ModifySpec:
"""
The form for the modify_* options is as follows:
* modify_headers: [/flow-filter]/header-name/[@]header-value
* modify_body: [/flow-filter]/search-regex/[@]replace
The @ allows to provide a file path that is used to read the respective option.
Both ModifyHeaders and ModifyBody use ModifySpec to represent a single rule.
The first character specifies the separator. Example:
:~q:foo:bar
If only two clauses are specified, the pattern is set to match
universally (i.e. ".*"). Example:
If only two clauses are specified, the flow filter is set to
match universally (i.e. ".*"). Example:
/foo/bar
Clauses are parsed from left to right. Extra separators are taken to be
part of the final clause. For instance, the replacement clause below is
"foo/bar/":
part of the final clause. For instance, the last parameter (header-value or
replace) below is "foo/bar/":
/one/two/foo/bar/
"""
sep, rem = s[0], s[1:]
sep, rem = option[0], option[1:]
parts = rem.split(sep, 2)
if len(parts) == 2:
flow_filter = ".*"
header_name, header_value = parts
flow_filter_pattern = ".*"
subject, replacement = parts
elif len(parts) == 3:
flow_filter, header_name, header_value = parts
flow_filter_pattern, subject, replacement = parts
else:
raise ValueError(
"Invalid modify_* specifier: %s" % s
)
raise ValueError(f"Invalid number of parameters (2 or 3 are expected)")
if isinstance(header_name, str):
header_name = strutils.escaped_str_to_bytes(header_name)
if isinstance(header_value, str):
header_value = strutils.escaped_str_to_bytes(header_value)
flow_filter = flowfilter.parse(flow_filter_pattern)
if not flow_filter:
raise ValueError(f"Invalid filter pattern: {flow_filter_pattern}")
return flow_filter, header_name, header_value
if isinstance(subject, str):
subject = strutils.escaped_str_to_bytes(subject)
if isinstance(replacement, str):
replacement = strutils.escaped_str_to_bytes(replacement)
if replacement.startswith(b"@") and not os.path.isfile(replacement[1:]):
raise ValueError(f"Invalid file path: {replacement[1:]}")
return ModifySpec(flow_filter_pattern, flow_filter, subject, replacement)
class ModifyHeaders:
def __init__(self):
self.lst = []
self.replacements: typing.List[ModifySpec] = []
def load(self, loader):
loader.add_option(
"modify_headers", typing.Sequence[str], [],
"""
Header modify pattern of the form "[/flow-filter]/header-name/header-value", where the
separator can be any character. An empty header-value removes existing header-name headers.
Header modify pattern of the form "[/flow-filter]/header-name/[@]header-value", where the
separator can be any character. The @ allows to provide a file path that is used to read
the header value string. An empty header-value removes existing header-name headers.
"""
)
def configure(self, updated):
self.replacements = []
if "modify_headers" in updated:
self.lst = []
for shead in ctx.options.modify_headers:
for option in ctx.options.modify_headers:
try:
flow_filter_pattern, header, value = parse_modify_hook(shead)
spec = parse_modify_spec(option)
except ValueError as e:
raise exceptions.OptionsError(
"Invalid modify_headers option: %s" % shead
f"Cannot parse modify_headers option {option}: {e}"
) from e
self.replacements.append(spec)
flow_filter = flowfilter.parse(flow_filter_pattern)
if not flow_filter:
raise exceptions.OptionsError(
"Invalid modify_headers flow filter %s" % flow_filter_pattern
)
self.lst.append((flow_filter_pattern, flow_filter, header, value))
def run(self, flow, hdrs):
# unset all specified headers
for spec in self.replacements:
if spec.matches(flow):
hdrs.pop(spec.subject, None)
def run(self, f, hdrs):
for _, flow_filter, header, value in self.lst:
if flow_filter(f):
hdrs.pop(header, None)
for _, flow_filter, header, value in self.lst:
if flow_filter(f) and value:
hdrs.add(header, value)
# set all specified headers if the replacement string is not empty
for spec in self.replacements:
if spec.replacement.startswith(b"@"):
path = os.path.expanduser(spec.replacement[1:])
try:
with open(path, "rb") as file:
replacement = file.read()
except IOError:
ctx.log.warn(f"Could not read replacement file {path}")
return
else:
replacement = spec.replacement
if spec.matches(flow) and replacement:
hdrs.add(spec.subject, replacement)
def request(self, flow):
if not flow.reply.has_message:

View File

@ -1,31 +1,37 @@
import pytest
from mitmproxy.addons import modifybody
from mitmproxy.addons.modifyheaders import parse_modify_hook
from mitmproxy.addons.modifyheaders import parse_modify_spec
from mitmproxy.test import taddons
from mitmproxy.test import tflow
class TestModifyBody:
def test_parse_modify_hook(self):
x = parse_modify_hook("/foo/bar/voing")
assert x == ("foo", b"bar", b"voing")
x = parse_modify_hook("/foo/bar/vo/ing/")
assert x == ("foo", b"bar", b"vo/ing/")
x = parse_modify_hook("/bar/voing")
assert x == (".*", b"bar", b"voing")
with pytest.raises(Exception, match="Invalid modify_\\* specifier"):
parse_modify_hook("/")
def test_parse_modify_spec(self):
x = parse_modify_spec("/foo/bar/voing")
assert [x[0], x[2], x[3]] == ["foo", b"bar", b"voing"]
x = parse_modify_spec("/foo/bar/vo/ing/")
assert [x[0], x[2], x[3]] == ["foo", b"bar", b"vo/ing/"]
x = parse_modify_spec("/bar/voing")
assert [x[0], x[2], x[3]] == [".*", b"bar", b"voing"]
with pytest.raises(Exception, match="Invalid number of parameters"):
parse_modify_spec("/")
with pytest.raises(Exception, match="Invalid filter pattern"):
parse_modify_spec("/~b/one/two")
def test_configure(self):
mb = modifybody.ModifyBody()
with taddons.context(mb) as tctx:
tctx.configure(mb, modify_body=["one/two/three"])
with pytest.raises(Exception, match="Invalid modify_body option"):
with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid number"):
tctx.configure(mb, modify_body = ["/"])
with pytest.raises(Exception, match="Invalid modify_body flow filter"):
with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid filter"):
tctx.configure(mb, modify_body=["/~b/two/three"])
with pytest.raises(Exception, match="Invalid regular expression"):
with pytest.raises(Exception, match="Cannot parse modify_body .* Invalid regular expression"):
tctx.configure(mb, modify_body=["/foo/+/three"])
tctx.configure(mb, modify_body=["/a/b/c/"])

View File

@ -7,30 +7,44 @@ from mitmproxy.addons import modifyheaders
class TestModifyHeaders:
def test_parse_modify_hook(self):
x = modifyheaders.parse_modify_hook("/foo/bar/voing")
assert x == ("foo", b"bar", b"voing")
x = modifyheaders.parse_modify_hook("/foo/bar/vo/ing/")
assert x == ("foo", b"bar", b"vo/ing/")
x = modifyheaders.parse_modify_hook("/bar/voing")
assert x == (".*", b"bar", b"voing")
with pytest.raises(Exception, match="Invalid modify_\\* specifier"):
modifyheaders.parse_modify_hook("/")
def test_parse_modify_spec(self):
x = modifyheaders.parse_modify_spec("/foo/bar/voing")
assert [x[0], x[2], x[3]] == ["foo", b"bar", b"voing"]
x = modifyheaders.parse_modify_spec("/foo/bar/vo/ing/")
assert [x[0], x[2], x[3]] == ["foo", b"bar", b"vo/ing/"]
x = modifyheaders.parse_modify_spec("/bar/voing")
assert [x[0], x[2], x[3]] == [".*", b"bar", b"voing"]
with pytest.raises(Exception, match="Invalid number of parameters"):
modifyheaders.parse_modify_spec("/")
with pytest.raises(Exception, match="Invalid filter pattern"):
modifyheaders.parse_modify_spec("/~b/one/two")
with pytest.raises(Exception, match="Invalid file path"):
modifyheaders.parse_modify_spec("/~q/foo/@nonexistent")
def test_configure(self):
sh = modifyheaders.ModifyHeaders()
with taddons.context(sh) as tctx:
with pytest.raises(Exception, match="Invalid modify_headers option"):
tctx.configure(sh, modify_headers = ["/"])
with pytest.raises(Exception, match="Invalid modify_headers flow filter"):
tctx.configure(sh, modify_headers = ["/~b/one/two"])
tctx.configure(sh, modify_headers = ["/foo/bar/voing"])
mh = modifyheaders.ModifyHeaders()
with taddons.context(mh) as tctx:
with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid number"):
tctx.configure(mh, modify_headers = ["/"])
with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid filter"):
tctx.configure(mh, modify_headers = ["/~b/one/two"])
with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid file"):
tctx.configure(mh, modify_headers = ["/~q/foo/@nonexistent"])
tctx.configure(mh, modify_headers = ["/foo/bar/voing"])
def test_modify_headers(self):
sh = modifyheaders.ModifyHeaders()
with taddons.context(sh) as tctx:
mh = modifyheaders.ModifyHeaders()
with taddons.context(mh) as tctx:
tctx.configure(
sh,
mh,
modify_headers = [
"/~q/one/two",
"/~s/one/three"
@ -38,16 +52,16 @@ class TestModifyHeaders:
)
f = tflow.tflow()
f.request.headers["one"] = "xxx"
sh.request(f)
mh.request(f)
assert f.request.headers["one"] == "two"
f = tflow.tflow(resp=True)
f.response.headers["one"] = "xxx"
sh.response(f)
mh.response(f)
assert f.response.headers["one"] == "three"
tctx.configure(
sh,
mh,
modify_headers = [
"/~s/one/two",
"/~s/one/three"
@ -56,11 +70,11 @@ class TestModifyHeaders:
f = tflow.tflow(resp=True)
f.request.headers["one"] = "xxx"
f.response.headers["one"] = "xxx"
sh.response(f)
mh.response(f)
assert f.response.headers.get_all("one") == ["two", "three"]
tctx.configure(
sh,
mh,
modify_headers = [
"/~q/one/two",
"/~q/one/three"
@ -68,12 +82,12 @@ class TestModifyHeaders:
)
f = tflow.tflow()
f.request.headers["one"] = "xxx"
sh.request(f)
mh.request(f)
assert f.request.headers.get_all("one") == ["two", "three"]
# test removal of existing headers
tctx.configure(
sh,
mh,
modify_headers = [
"/~q/one/",
"/~s/one/"
@ -81,26 +95,64 @@ class TestModifyHeaders:
)
f = tflow.tflow()
f.request.headers["one"] = "xxx"
sh.request(f)
mh.request(f)
assert "one" not in f.request.headers
f = tflow.tflow(resp=True)
f.response.headers["one"] = "xxx"
sh.response(f)
mh.response(f)
assert "one" not in f.response.headers
tctx.configure(
sh,
mh,
modify_headers = [
"/one/"
]
)
f = tflow.tflow()
f.request.headers["one"] = "xxx"
sh.request(f)
mh.request(f)
assert "one" not in f.request.headers
f = tflow.tflow(resp=True)
f.response.headers["one"] = "xxx"
sh.response(f)
mh.response(f)
assert "one" not in f.response.headers
class TestModifyHeadersFile:
def test_simple(self, tmpdir):
mh = modifyheaders.ModifyHeaders()
with taddons.context(mh) as tctx:
tmpfile = tmpdir.join("replacement")
tmpfile.write("two")
tctx.configure(
mh,
modify_headers=["/~q/one/@" + str(tmpfile)]
)
f = tflow.tflow()
f.request.headers["one"] = "xxx"
mh.request(f)
assert f.request.headers["one"] == "two"
@pytest.mark.asyncio
async def test_nonexistent(self, tmpdir):
mh = modifyheaders.ModifyHeaders()
with taddons.context(mh) as tctx:
with pytest.raises(Exception, match="Cannot parse modify_headers .* Invalid file path"):
tctx.configure(
mh,
modify_headers=["/~q/foo/@nonexistent"]
)
tmpfile = tmpdir.join("replacement")
tmpfile.write("bar")
tctx.configure(
mh,
modify_headers=["/~q/foo/@" + str(tmpfile)]
)
tmpfile.remove()
f = tflow.tflow()
f.request.content = b"foo"
mh.request(f)
assert await tctx.master.await_log("could not read")