From 1b6b3cd96cf2966b888d9259e4e55935dca6e763 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Tue, 7 Jul 2020 16:12:23 +0200 Subject: [PATCH 01/22] add MapLocal addon draft --- mitmproxy/addons/__init__.py | 2 + mitmproxy/addons/maplocal.py | 104 ++++++++++++++++++++ mitmproxy/addons/mapremote.py | 2 +- mitmproxy/addons/modifybody.py | 2 +- mitmproxy/addons/modifyheaders.py | 20 ++-- mitmproxy/tools/cmdline.py | 4 + test/mitmproxy/addons/test_maplocal.py | 34 +++++++ test/mitmproxy/addons/test_modifyheaders.py | 14 +-- 8 files changed, 167 insertions(+), 15 deletions(-) create mode 100644 mitmproxy/addons/maplocal.py create mode 100644 test/mitmproxy/addons/test_maplocal.py diff --git a/mitmproxy/addons/__init__.py b/mitmproxy/addons/__init__.py index f3d964522..85135ee36 100644 --- a/mitmproxy/addons/__init__.py +++ b/mitmproxy/addons/__init__.py @@ -14,6 +14,7 @@ from mitmproxy.addons import proxyauth from mitmproxy.addons import script from mitmproxy.addons import serverplayback from mitmproxy.addons import mapremote +from mitmproxy.addons import maplocal from mitmproxy.addons import modifybody from mitmproxy.addons import modifyheaders from mitmproxy.addons import stickyauth @@ -41,6 +42,7 @@ def default_addons(): script.ScriptLoader(), serverplayback.ServerPlayback(), mapremote.MapRemote(), + maplocal.MapLocal(), modifybody.ModifyBody(), modifyheaders.ModifyHeaders(), stickyauth.StickyAuth(), diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py new file mode 100644 index 000000000..b919621a3 --- /dev/null +++ b/mitmproxy/addons/maplocal.py @@ -0,0 +1,104 @@ +import re +import typing +import urllib +from pathlib import Path + +from mitmproxy import exceptions +from mitmproxy import ctx +from mitmproxy import http +from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec + + +class MapLocal: + def __init__(self): + self.replacements: typing.List[ModifySpec] = [] + + def load(self, loader): + loader.add_option( + "map_local", typing.Sequence[str], [], + """ + Replacement pattern of the form "[/flow-filter]/regex/file-or-directory", where + the separator can be any character. The @ allows to provide a file path that + is used to read the replacement string. + """ + ) + + def configure(self, updated): + if "map_local" in updated: + self.replacements = [] + for option in ctx.options.map_local: + try: + spec = parse_modify_spec(option, True, True) + except ValueError as e: + raise exceptions.OptionsError(f"Cannot parse map_local option {option}: {e}") from e + + self.replacements.append(spec) + + def construct_candidate_path(self, base_path, path_components, filename): + candidate_path = base_path.joinpath("/".join(path_components + [filename])) + return str(candidate_path) + + def sanitize_candidate_path(self, candidate_path, base_path): + try: + candidate_path.resolve(strict=True) + if base_path in candidate_path.parents: + return candidate_path + except FileNotFoundError: + pass + return None + + def file_candidates(self, url: str, spec: ModifySpec) -> typing.List[Path]: + replacement = spec.replacement_str + candidates = [] + + if replacement.is_file(): + candidates.append(replacement) + + elif replacement.is_dir(): + parsed_url = urllib.parse.urlparse(url) + + path_components = parsed_url.path.lstrip("/").split("/") + filename = path_components.pop() + + # todo: this can be improved (e.g., also consider index.htm) + if not filename: + filename = 'index.html' + + # construct all possible paths + while True: + candidates.append( + self.construct_candidate_path(replacement, path_components, filename) + ) + + if not path_components: + break + + path_components.pop() + + return candidates + + def request(self, flow: http.HTTPFlow) -> None: + if flow.reply and flow.reply.has_message: + return + for spec in self.replacements: + req = flow.request + url = req.pretty_url + base_path = Path(spec.replacement_str) + if spec.matches(flow) and re.search(spec.subject, url.encode("utf8", "surrogateescape")): + file_candidates = self.file_candidates(url, spec) + for file_candidate in file_candidates: + file_candidate = Path(file_candidate) + if self.sanitize_candidate_path(file_candidate, base_path): + try: + with open(file_candidate, "rb") as file: + replacement = file.read() + except IOError: + ctx.log.warn(f"Could not read replacement file {file_candidate}") + return + + flow.response = http.HTTPResponse.make( + 200, # (optional) status code + replacement, # (optional) content + # todo: guess mime type + {"Content-Type": "image/jpeg"} # (optional) headers + ) diff --git a/mitmproxy/addons/mapremote.py b/mitmproxy/addons/mapremote.py index 03f303da4..fb8503a7d 100644 --- a/mitmproxy/addons/mapremote.py +++ b/mitmproxy/addons/mapremote.py @@ -25,7 +25,7 @@ class MapRemote: self.replacements = [] for option in ctx.options.map_remote: try: - spec = parse_modify_spec(option, True) + spec = parse_modify_spec(option, True, False) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse map_remote option {option}: {e}") from e diff --git a/mitmproxy/addons/modifybody.py b/mitmproxy/addons/modifybody.py index 78d09f90b..7201f5984 100644 --- a/mitmproxy/addons/modifybody.py +++ b/mitmproxy/addons/modifybody.py @@ -25,7 +25,7 @@ class ModifyBody: self.replacements = [] for option in ctx.options.modify_body: try: - spec = parse_modify_spec(option, True) + spec = parse_modify_spec(option, True, False) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse modify_body option {option}: {e}") from e diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index addac643a..50fae906d 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -33,7 +33,7 @@ def _match_all(flow) -> bool: return True -def parse_modify_spec(option, subject_is_regex: bool) -> ModifySpec: +def parse_modify_spec(option, subject_is_regex: bool, replacement_is_path: bool) -> ModifySpec: """ The form for the modify_* options is as follows: @@ -78,12 +78,20 @@ def parse_modify_spec(option, subject_is_regex: bool) -> ModifySpec: except re.error as e: raise ValueError(f"Invalid regular expression {subject!r} ({e})") + if replacement_is_path: + path = Path(replacement) + try: + replacement = path.expanduser().resolve(strict=True) + except FileNotFoundError as e: + raise ValueError(f"Invalid file path: {replacement} ({e})") + spec = ModifySpec(flow_filter, subject, replacement) - try: - spec.read_replacement() - except IOError as e: - raise ValueError(f"Invalid file path: {replacement[1:]} ({e})") + if not replacement_is_path: + try: + spec.read_replacement() + except IOError as e: + raise ValueError(f"Invalid file path: {replacement[1:]} ({e})") return spec @@ -107,7 +115,7 @@ class ModifyHeaders: if "modify_headers" in updated: for option in ctx.options.modify_headers: try: - spec = parse_modify_spec(option, False) + spec = parse_modify_spec(option, False, False) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse modify_headers option {option}: {e}") from e self.replacements.append(spec) diff --git a/mitmproxy/tools/cmdline.py b/mitmproxy/tools/cmdline.py index 488cdb69b..eac1561c0 100644 --- a/mitmproxy/tools/cmdline.py +++ b/mitmproxy/tools/cmdline.py @@ -85,6 +85,10 @@ def common_options(parser, opts): group = parser.add_argument_group("Map Remote") opts.make_parser(group, "map_remote", metavar="PATTERN", short="M") + # Map Local + group = parser.add_argument_group("Map Local") + opts.make_parser(group, "map_local", metavar="PATTERN") + # Modify Body group = parser.add_argument_group("Modify Body") opts.make_parser(group, "modify_body", metavar="PATTERN", short="B") diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py new file mode 100644 index 000000000..f0e530524 --- /dev/null +++ b/test/mitmproxy/addons/test_maplocal.py @@ -0,0 +1,34 @@ +import pytest +from pathlib import Path + +from mitmproxy.addons.maplocal import MapLocal +from mitmproxy.test import taddons +from mitmproxy.test import tflow + +from mitmproxy.addons.modifyheaders import parse_modify_spec + + +class TestMapLocal: + def test_file_candidates(self, tmpdir): + ml = MapLocal() + + url = "https://example.org/img/topic/subtopic/test.jpg" + spec = parse_modify_spec(":/img/jpg:" + str(tmpdir), True, True) + file_candidates = ml.file_candidates(url, spec) + assert file_candidates[0] == str(tmpdir) + "/img/topic/subtopic/test.jpg" + assert file_candidates[1] == str(tmpdir) + "/img/topic/test.jpg" + assert file_candidates[2] == str(tmpdir) + "/img/test.jpg" + assert file_candidates[3] == str(tmpdir) + "/test.jpg" + + url = "https://example.org/img/topic/subtopic/" + spec = parse_modify_spec(":/img:" + str(tmpdir), True, True) + file_candidates = ml.file_candidates(url, spec) + assert file_candidates[0] == str(tmpdir) + "/img/topic/subtopic/index.html" + assert file_candidates[1] == str(tmpdir) + "/img/topic/index.html" + assert file_candidates[2] == str(tmpdir) + "/img/index.html" + assert file_candidates[3] == str(tmpdir) + "/index.html" + + url = "https://example.org" + spec = parse_modify_spec(":org:" + str(tmpdir), True, True) + file_candidates = ml.file_candidates(url, spec) + assert file_candidates[0] == str(tmpdir) + "/index.html" diff --git a/test/mitmproxy/addons/test_modifyheaders.py b/test/mitmproxy/addons/test_modifyheaders.py index f3ebd279f..42281322c 100644 --- a/test/mitmproxy/addons/test_modifyheaders.py +++ b/test/mitmproxy/addons/test_modifyheaders.py @@ -7,32 +7,32 @@ from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifyHeaders def test_parse_modify_spec(): - spec = parse_modify_spec("/foo/bar/voing", True) + spec = parse_modify_spec("/foo/bar/voing", True, False) assert spec.matches.pattern == "foo" assert spec.subject == b"bar" assert spec.read_replacement() == b"voing" - spec = parse_modify_spec("/foo/bar/vo/ing/", False) + spec = parse_modify_spec("/foo/bar/vo/ing/", False, False) assert spec.matches.pattern == "foo" assert spec.subject == b"bar" assert spec.read_replacement() == b"vo/ing/" - spec = parse_modify_spec("/bar/voing", False) + spec = parse_modify_spec("/bar/voing", False, False) assert spec.matches(tflow.tflow()) assert spec.subject == b"bar" assert spec.read_replacement() == b"voing" with pytest.raises(ValueError, match="Invalid number of parameters"): - parse_modify_spec("/", False) + parse_modify_spec("/", False, False) with pytest.raises(ValueError, match="Invalid filter pattern"): - parse_modify_spec("/~b/one/two", False) + parse_modify_spec("/~b/one/two", False, False) with pytest.raises(ValueError, match="Invalid filter pattern"): - parse_modify_spec("/~b/one/two", False) + parse_modify_spec("/~b/one/two", False, False) with pytest.raises(ValueError, match="Invalid regular expression"): - parse_modify_spec("/[/two", True) + parse_modify_spec("/[/two", True, False) class TestModifyHeaders: From 7022a54737688318736aeda8d88a16b2f8207a21 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Tue, 7 Jul 2020 23:19:29 +0200 Subject: [PATCH 02/22] maplocal addon: add mimetype guessing --- mitmproxy/addons/maplocal.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index b919621a3..b50965a37 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -1,3 +1,4 @@ +import mimetypes import re import typing import urllib @@ -77,6 +78,14 @@ class MapLocal: return candidates + def get_mime_type(self, file_path): + mimetype = ( + mimetypes.guess_type(file_path)[0] + or "text/plain" + ) + return mimetype + + def request(self, flow: http.HTTPFlow) -> None: if flow.reply and flow.reply.has_message: return @@ -84,8 +93,10 @@ class MapLocal: req = flow.request url = req.pretty_url base_path = Path(spec.replacement_str) + if spec.matches(flow) and re.search(spec.subject, url.encode("utf8", "surrogateescape")): file_candidates = self.file_candidates(url, spec) + for file_candidate in file_candidates: file_candidate = Path(file_candidate) if self.sanitize_candidate_path(file_candidate, base_path): @@ -97,8 +108,7 @@ class MapLocal: return flow.response = http.HTTPResponse.make( - 200, # (optional) status code - replacement, # (optional) content - # todo: guess mime type - {"Content-Type": "image/jpeg"} # (optional) headers + 200, + replacement, + {"Content-Type": self.get_mime_type(str(file_candidate))} ) From b1609697cdf107bbf4f422e3d0a1cd61097fc7f4 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 01:34:14 +0200 Subject: [PATCH 03/22] maplocal addon: minor improvements --- mitmproxy/addons/maplocal.py | 24 ++++++++++-------------- mitmproxy/addons/modifyheaders.py | 8 ++++---- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index b50965a37..cc1c2a9f1 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -41,15 +41,15 @@ class MapLocal: def sanitize_candidate_path(self, candidate_path, base_path): try: - candidate_path.resolve(strict=True) - if base_path in candidate_path.parents: + candidate_path = candidate_path.resolve(strict=True) + if base_path == candidate_path or base_path in candidate_path.parents: return candidate_path except FileNotFoundError: pass return None def file_candidates(self, url: str, spec: ModifySpec) -> typing.List[Path]: - replacement = spec.replacement_str + replacement = spec.replacement candidates = [] if replacement.is_file(): @@ -85,30 +85,26 @@ class MapLocal: ) return mimetype - def request(self, flow: http.HTTPFlow) -> None: if flow.reply and flow.reply.has_message: return for spec in self.replacements: req = flow.request url = req.pretty_url - base_path = Path(spec.replacement_str) + base_path = Path(spec.replacement) if spec.matches(flow) and re.search(spec.subject, url.encode("utf8", "surrogateescape")): file_candidates = self.file_candidates(url, spec) - + for file_candidate in file_candidates: file_candidate = Path(file_candidate) if self.sanitize_candidate_path(file_candidate, base_path): try: - with open(file_candidate, "rb") as file: - replacement = file.read() + flow.response = http.HTTPResponse.make( + 200, + file_candidate.read_bytes(), + {"Content-Type": self.get_mime_type(str(file_candidate))} + ) except IOError: ctx.log.warn(f"Could not read replacement file {file_candidate}") return - - flow.response = http.HTTPResponse.make( - 200, - replacement, - {"Content-Type": self.get_mime_type(str(file_candidate))} - ) diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index 50fae906d..590cf959a 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -12,7 +12,7 @@ from mitmproxy import ctx class ModifySpec(typing.NamedTuple): matches: flowfilter.TFilter subject: bytes - replacement_str: str + replacement: str def read_replacement(self) -> bytes: """ @@ -22,11 +22,11 @@ class ModifySpec(typing.NamedTuple): Raises: - IOError if the file cannot be read. """ - if self.replacement_str.startswith("@"): - return Path(self.replacement_str[1:]).expanduser().read_bytes() + if self.replacement.startswith("@"): + return Path(self.replacement[1:]).expanduser().read_bytes() else: # We could cache this at some point, but unlikely to be a problem. - return strutils.escaped_str_to_bytes(self.replacement_str) + return strutils.escaped_str_to_bytes(self.replacement) def _match_all(flow) -> bool: From 53644de82093a2b3fc810f422ed6455420dde80d Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 01:34:48 +0200 Subject: [PATCH 04/22] maplocal addon: add some tests --- test/mitmproxy/addons/test_maplocal.py | 80 +++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index f0e530524..5ecdc0031 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -1,4 +1,3 @@ -import pytest from pathlib import Path from mitmproxy.addons.maplocal import MapLocal @@ -32,3 +31,82 @@ class TestMapLocal: spec = parse_modify_spec(":org:" + str(tmpdir), True, True) file_candidates = ml.file_candidates(url, spec) assert file_candidates[0] == str(tmpdir) + "/index.html" + + def test_sanitize_candidate_path(self, tmpdir): + base_dir = Path(str(tmpdir)) + + tmpdir.join("testdir1", "testdir2", "testdir3", "testdir4", "testfile").write("bar", ensure=True) + + ml = MapLocal() + assert ml.sanitize_candidate_path( + base_dir.joinpath("..", "bar"), base_dir + ) is None + assert ml.sanitize_candidate_path( + base_dir.joinpath(".."), base_dir + ) is None + assert ml.sanitize_candidate_path( + base_dir.joinpath("..", ".."), base_dir + ) is None + assert ml.sanitize_candidate_path( + base_dir.joinpath("..", "..", "..", "..", "..", "..", "etc", "passwd"), base_dir + ) is None + + assert ml.sanitize_candidate_path( + base_dir.joinpath("testdir1"), base_dir + ) is not None + assert ml.sanitize_candidate_path( + base_dir.joinpath("testdir1", "testdir2"), base_dir + ) is not None + assert ml.sanitize_candidate_path( + base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile"), base_dir + ) is not None + assert ml.sanitize_candidate_path( + base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile"), + base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile") + ) is not None + + def test_modify_headers(self, tmpdir): + ml = MapLocal() + + with taddons.context(ml) as tctx: + tmpfile = tmpdir.join("test1.jpg") + tmpfile.write("local content 1") + + tctx.configure( + ml, + map_local=[ + ":jpg:" + str(tmpdir) + ] + ) + f = tflow.tflow() + f.request.url = b"https://example.org/images/test1.jpg" + ml.request(f) + assert f.response.content == b"local content 1" + + tmpfile = tmpdir.join("images", "test2.jpg") + tmpfile.write("local content 2", ensure=True) + + tctx.configure( + ml, + map_local=[ + ":jpg:" + str(tmpdir) + ] + ) + f = tflow.tflow() + f.request.url = b"https://example.org/images/test2.jpg" + ml.request(f) + assert f.response.content == b"local content 2" + + tmpfile = tmpdir.join("images", "test3.jpg") + tmpfile.write("local content 3", ensure=True) + + tctx.configure( + ml, + map_local=[ + ":jpg:" + str(tmpfile) + ] + ) + f = tflow.tflow() + f.request.url = b"https://example.org/images/test3.jpg" + ml.request(f) + assert f.response.content == b"local content 3" From 55bba9265393a6f165dede7f8cf34df59ccaa663 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 23:39:44 +0200 Subject: [PATCH 05/22] maplocal addon: improvements --- mitmproxy/addons/maplocal.py | 132 ++++++++++++++---------------- mitmproxy/addons/mapremote.py | 10 +-- mitmproxy/addons/modifybody.py | 3 +- mitmproxy/addons/modifyheaders.py | 19 +++-- 4 files changed, 79 insertions(+), 85 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index cc1c2a9f1..f8f509631 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -3,13 +3,47 @@ import re import typing import urllib from pathlib import Path +from werkzeug.security import safe_join -from mitmproxy import exceptions -from mitmproxy import ctx -from mitmproxy import http +from mitmproxy import ctx, exceptions, http from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec +def get_mime_type(file_path: str) -> str: + mimetype = ( + mimetypes.guess_type(file_path)[0] + or "application/octet-stream" + ) + return mimetype + + +def file_candidates(url: str, base_path: str) -> typing.List[Path]: + candidates = [] + parsed_url = urllib.parse.urlparse(url) + path_components = parsed_url.path.lstrip("/").split("/") + filename = path_components.pop() + + # todo: we may want to consider other filenames such as index.htm) + if not filename: + filename = 'index.html' + + # construct all possible paths + while True: + components_with_filename = tuple(path_components + [filename]) + candidate_path = safe_join(base_path, *components_with_filename) + if candidate_path: + candidates.append( + Path(candidate_path) + ) + + if not path_components: + break + + path_components.pop() + + return candidates + + class MapLocal: def __init__(self): self.replacements: typing.List[ModifySpec] = [] @@ -18,9 +52,9 @@ class MapLocal: loader.add_option( "map_local", typing.Sequence[str], [], """ - Replacement pattern of the form "[/flow-filter]/regex/file-or-directory", where - the separator can be any character. The @ allows to provide a file path that - is used to read the replacement string. + Map remote resources to a local file using a pattern of the form + "[/flow-filter]/url-regex/file-or-directory-path", where the + separator can be any character. """ ) @@ -35,76 +69,36 @@ class MapLocal: self.replacements.append(spec) - def construct_candidate_path(self, base_path, path_components, filename): - candidate_path = base_path.joinpath("/".join(path_components + [filename])) - return str(candidate_path) - - def sanitize_candidate_path(self, candidate_path, base_path): - try: - candidate_path = candidate_path.resolve(strict=True) - if base_path == candidate_path or base_path in candidate_path.parents: - return candidate_path - except FileNotFoundError: - pass - return None - - def file_candidates(self, url: str, spec: ModifySpec) -> typing.List[Path]: - replacement = spec.replacement - candidates = [] - - if replacement.is_file(): - candidates.append(replacement) - - elif replacement.is_dir(): - parsed_url = urllib.parse.urlparse(url) - - path_components = parsed_url.path.lstrip("/").split("/") - filename = path_components.pop() - - # todo: this can be improved (e.g., also consider index.htm) - if not filename: - filename = 'index.html' - - # construct all possible paths - while True: - candidates.append( - self.construct_candidate_path(replacement, path_components, filename) - ) - - if not path_components: - break - - path_components.pop() - - return candidates - - def get_mime_type(self, file_path): - mimetype = ( - mimetypes.guess_type(file_path)[0] - or "text/plain" - ) - return mimetype - def request(self, flow: http.HTTPFlow) -> None: if flow.reply and flow.reply.has_message: return + for spec in self.replacements: req = flow.request url = req.pretty_url base_path = Path(spec.replacement) if spec.matches(flow) and re.search(spec.subject, url.encode("utf8", "surrogateescape")): - file_candidates = self.file_candidates(url, spec) + replacement_path = None + if base_path.is_file(): + replacement_path = base_path + elif base_path.is_dir(): + candidates = file_candidates(url, str(base_path)) + for candidate in candidates: + # check that path is not outside of the user-defined base_path + if candidate.is_file() and base_path in candidate.parents: + replacement_path = candidate + break - for file_candidate in file_candidates: - file_candidate = Path(file_candidate) - if self.sanitize_candidate_path(file_candidate, base_path): - try: - flow.response = http.HTTPResponse.make( - 200, - file_candidate.read_bytes(), - {"Content-Type": self.get_mime_type(str(file_candidate))} - ) - except IOError: - ctx.log.warn(f"Could not read replacement file {file_candidate}") - return + if replacement_path: + try: + flow.response = http.HTTPResponse.make( + 200, + replacement_path.read_bytes(), + {"Content-Type": get_mime_type(str(replacement_path))} + ) + # only set flow.response once, for the first matching rule + break + except IOError: + ctx.log.warn(f"Could not read replacement file {replacement_path}") + return diff --git a/mitmproxy/addons/mapremote.py b/mitmproxy/addons/mapremote.py index fb8503a7d..b6d331920 100644 --- a/mitmproxy/addons/mapremote.py +++ b/mitmproxy/addons/mapremote.py @@ -1,8 +1,7 @@ import re import typing -from mitmproxy import exceptions, http -from mitmproxy import ctx +from mitmproxy import ctx, exceptions, http from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec @@ -14,9 +13,10 @@ class MapRemote: loader.add_option( "map_remote", typing.Sequence[str], [], """ - Replacement pattern of the form "[/flow-filter]/regex/[@]replacement", where - the separator can be any character. The @ allows to provide a file path that - is used to read the replacement string. + Map remote resources to another remote URL using a pattern of the form + "[/flow-filter]/url-regex/[@]replacement", where the separator can + be any character. The @ allows to provide a file path that is + used to read the replacement string. """ ) diff --git a/mitmproxy/addons/modifybody.py b/mitmproxy/addons/modifybody.py index 7201f5984..9ec6e88e4 100644 --- a/mitmproxy/addons/modifybody.py +++ b/mitmproxy/addons/modifybody.py @@ -1,8 +1,7 @@ import re import typing -from mitmproxy import exceptions -from mitmproxy import ctx +from mitmproxy import ctx, exceptions, http from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index 590cf959a..030ef5de9 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -2,11 +2,9 @@ import re import typing from pathlib import Path -from mitmproxy import exceptions, http -from mitmproxy import flowfilter +from mitmproxy import ctx, exceptions, flowfilter, http from mitmproxy.net.http import Headers from mitmproxy.utils import strutils -from mitmproxy import ctx class ModifySpec(typing.NamedTuple): @@ -35,13 +33,16 @@ def _match_all(flow) -> bool: def parse_modify_spec(option, subject_is_regex: bool, replacement_is_path: bool) -> ModifySpec: """ - The form for the modify_* options is as follows: + The form for the modify_*, map_remote, and map_local options is as follows: + * modify_body: [/flow-filter]/body-regex/[@]replacement * modify_headers: [/flow-filter]/header-name/[@]header-value - * modify_body: [/flow-filter]/search-regex/[@]replace + * map_local: [:flow-filter]:url-regex:path + * map_remote: [:flow-filter]:url-regex:[@]replacement The @ allows to provide a file path that is used to read the respective option. - Both ModifyHeaders and ModifyBody use ModifySpec to represent a single rule. + The addons ModifyHeaders, ModifyBody, MapRemote, and MapLocal use ModifySpec + to represent a single rule. The first character specifies the separator. Example: @@ -53,8 +54,8 @@ def parse_modify_spec(option, subject_is_regex: bool, replacement_is_path: bool) /foo/bar Clauses are parsed from left to right. Extra separators are taken to be - part of the final clause. For instance, the last parameter (header-value or - replace) below is "foo/bar/": + part of the final clause. For instance, the last parameter (header-value, + replace, or path) below is "foo/bar/": /one/two/foo/bar/ """ @@ -81,7 +82,7 @@ def parse_modify_spec(option, subject_is_regex: bool, replacement_is_path: bool) if replacement_is_path: path = Path(replacement) try: - replacement = path.expanduser().resolve(strict=True) + replacement = str(path.expanduser().resolve(strict=True)) except FileNotFoundError as e: raise ValueError(f"Invalid file path: {replacement} ({e})") From 41c99810ef4cf00679394d6b1f812af40329e174 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 23:52:22 +0200 Subject: [PATCH 06/22] maplocal addon: improve tests --- test/mitmproxy/addons/test_maplocal.py | 105 +++++++++---------------- 1 file changed, 35 insertions(+), 70 deletions(-) diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index 5ecdc0031..03e5d4626 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -1,77 +1,44 @@ -from pathlib import Path +import pytest -from mitmproxy.addons.maplocal import MapLocal +from mitmproxy.addons.maplocal import MapLocal, file_candidates from mitmproxy.test import taddons from mitmproxy.test import tflow from mitmproxy.addons.modifyheaders import parse_modify_spec +@pytest.mark.parametrize( + "url,spec,expected_candidates", [ + ( + "https://example.org/img/topic/subtopic/test.jpg", + ":example.com/foo:/tmp", + ["/tmp/img/topic/subtopic/test.jpg", "/tmp/img/topic/test.jpg", "/tmp/img/test.jpg", "/tmp/test.jpg"] + ), + ( + "https://example.org/img/topic/subtopic/", + ":/img:/tmp", + ["/tmp/img/topic/subtopic/index.html", "/tmp/img/topic/index.html", "/tmp/img/index.html", "/tmp/index.html"] + ), + ( + "https://example.org", + ":org:/tmp", + ["/tmp/index.html"] + ), + ] +) +def test_file_candidates(url, spec, expected_candidates): + spec = parse_modify_spec(spec, True, True) + candidates = file_candidates(url, spec.replacement) + assert [str(x) for x in candidates] == expected_candidates + + class TestMapLocal: - def test_file_candidates(self, tmpdir): - ml = MapLocal() - - url = "https://example.org/img/topic/subtopic/test.jpg" - spec = parse_modify_spec(":/img/jpg:" + str(tmpdir), True, True) - file_candidates = ml.file_candidates(url, spec) - assert file_candidates[0] == str(tmpdir) + "/img/topic/subtopic/test.jpg" - assert file_candidates[1] == str(tmpdir) + "/img/topic/test.jpg" - assert file_candidates[2] == str(tmpdir) + "/img/test.jpg" - assert file_candidates[3] == str(tmpdir) + "/test.jpg" - - url = "https://example.org/img/topic/subtopic/" - spec = parse_modify_spec(":/img:" + str(tmpdir), True, True) - file_candidates = ml.file_candidates(url, spec) - assert file_candidates[0] == str(tmpdir) + "/img/topic/subtopic/index.html" - assert file_candidates[1] == str(tmpdir) + "/img/topic/index.html" - assert file_candidates[2] == str(tmpdir) + "/img/index.html" - assert file_candidates[3] == str(tmpdir) + "/index.html" - - url = "https://example.org" - spec = parse_modify_spec(":org:" + str(tmpdir), True, True) - file_candidates = ml.file_candidates(url, spec) - assert file_candidates[0] == str(tmpdir) + "/index.html" - - def test_sanitize_candidate_path(self, tmpdir): - base_dir = Path(str(tmpdir)) - - tmpdir.join("testdir1", "testdir2", "testdir3", "testdir4", "testfile").write("bar", ensure=True) - - ml = MapLocal() - assert ml.sanitize_candidate_path( - base_dir.joinpath("..", "bar"), base_dir - ) is None - assert ml.sanitize_candidate_path( - base_dir.joinpath(".."), base_dir - ) is None - assert ml.sanitize_candidate_path( - base_dir.joinpath("..", ".."), base_dir - ) is None - assert ml.sanitize_candidate_path( - base_dir.joinpath("..", "..", "..", "..", "..", "..", "etc", "passwd"), base_dir - ) is None - - assert ml.sanitize_candidate_path( - base_dir.joinpath("testdir1"), base_dir - ) is not None - assert ml.sanitize_candidate_path( - base_dir.joinpath("testdir1", "testdir2"), base_dir - ) is not None - assert ml.sanitize_candidate_path( - base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile"), base_dir - ) is not None - assert ml.sanitize_candidate_path( - base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile"), - base_dir.joinpath("testdir1", "testdir2", "testdir3", "testdir4", "testfile") - ) is not None - - def test_modify_headers(self, tmpdir): + def test_map_local(self, tmpdir): ml = MapLocal() with taddons.context(ml) as tctx: tmpfile = tmpdir.join("test1.jpg") - tmpfile.write("local content 1") - + tmpfile.write("foo") tctx.configure( ml, map_local=[ @@ -81,11 +48,10 @@ class TestMapLocal: f = tflow.tflow() f.request.url = b"https://example.org/images/test1.jpg" ml.request(f) - assert f.response.content == b"local content 1" + assert f.response.content == b"foo" tmpfile = tmpdir.join("images", "test2.jpg") - tmpfile.write("local content 2", ensure=True) - + tmpfile.write("bar", ensure=True) tctx.configure( ml, map_local=[ @@ -95,11 +61,10 @@ class TestMapLocal: f = tflow.tflow() f.request.url = b"https://example.org/images/test2.jpg" ml.request(f) - assert f.response.content == b"local content 2" + assert f.response.content == b"bar" tmpfile = tmpdir.join("images", "test3.jpg") - tmpfile.write("local content 3", ensure=True) - + tmpfile.write("foobar", ensure=True) tctx.configure( ml, map_local=[ @@ -107,6 +72,6 @@ class TestMapLocal: ] ) f = tflow.tflow() - f.request.url = b"https://example.org/images/test3.jpg" + f.request.url = b"https://example.org/foo.jpg" ml.request(f) - assert f.response.content == b"local content 3" + assert f.response.content == b"foobar" From 8942ae88d507e4ed890ab35be69e37226d2f9511 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 23:53:44 +0200 Subject: [PATCH 07/22] maplocal addon: update docs --- docs/src/content/overview-features.md | 61 +++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/docs/src/content/overview-features.md b/docs/src/content/overview-features.md index b5bb7bc64..d87f99371 100644 --- a/docs/src/content/overview-features.md +++ b/docs/src/content/overview-features.md @@ -11,6 +11,7 @@ menu: - [Anticache](#anticache) - [Client-side replay](#client-side-replay) +- [Map Local](#map-local) - [Map Remote](#map-remote) - [Modify Body](#modify-body) - [Modify Headers](#modify-headers) @@ -43,6 +44,58 @@ You may want to use client-side replay in conjunction with the `anticache` option, to make sure the server responds with complete data. +## Map Local + +The `map_local` option lets you specify an arbitrary number of patterns that +define redirections of HTTP requests to local files or diretories. +The local file is fetched instead of the original resource +and the corresponding HTTP response is returned transparently to the client. +The mime type of the local file is guessed to set the `Content-Type` header. +`map_local` patterns looks like this: + +``` +|flow-filter|url-regex|file-path +|flow-filter|url-regex|diretory-path +|url-regex|file-path +|url-regex|diretory-path +``` + +* **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) +that defines which requests the `map_local` option applies to. + +* **url-regex** is a valid Python regular expression on the request URL that defines which requests the `map_local` option applies to. + +* **file-path** is a path to a file that is served instead of the original resource. + +* **diretory-path** is a path to a directory that is used to look for the resource +to serve instead of the original resource. mitmproxy tries to select the correct file +within **diretory-path** automatically. It first tries `diretory-path/url-path` and +strips the deepest directory repeatedly until it finds an existing file. +For example, with the **diretory-path** `/local` and the request URL `http://example.org/media/img/foo.jpg`, +mitmproxy looks for `/local/media/img/foo.jpg`, `/local/media/foo.jpg`, and `/local/foo.jpg`, +in this order. If no file is found, the original resource is served instead. + +### Examples + +Map all requests for `example.org/css/*` to the local directory `~/local-css`. + +``` +|//example.org/css/|~/local-css +``` + +Map all requests for `example.org/js/main.js` to the local file `~/main-local.js`. + +``` +|example.org/js/main.js|~/main-local.js +``` + +Map all requests ending with `.jpg` to the local file `~/foo.jpg`. + +``` +|.*\.jpg$|~/foo.jpg +``` + + ## Map Remote The `map_remote` option lets you specify an arbitrary number of patterns that @@ -54,16 +107,16 @@ needs to support HTTP2 as well, otherwise the substituted request may fail. `map_remote` patterns looks like this: ``` -|flow-filter|regex|replacement -|flow-filter|regex|@file-path +|flow-filter|url-regex|replacement +|flow-filter|url-regex|@file-path |regex|replacement |regex|@file-path ``` * **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) -that defines which requests a replacement applies to. +that defines which requests the `map_remote` option applies to. -* **regex** is a valid Python regular expression that defines what gets replaced in the URLs of requests. +* **url-regex** is a valid Python regular expression that defines what gets replaced in the URLs of requests. * **replacement** is a string literal that is substituted in. If the replacement string literal starts with `@` as in `@file-path`, it is treated as a **file path** from which the replacement is read. From eca6dc4d1c948528db49fcc524bff3b00411f9d1 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Wed, 8 Jul 2020 23:58:02 +0200 Subject: [PATCH 08/22] maplocal addon: fix linting --- mitmproxy/addons/modifybody.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitmproxy/addons/modifybody.py b/mitmproxy/addons/modifybody.py index 9ec6e88e4..3106cd194 100644 --- a/mitmproxy/addons/modifybody.py +++ b/mitmproxy/addons/modifybody.py @@ -1,7 +1,7 @@ import re import typing -from mitmproxy import ctx, exceptions, http +from mitmproxy import ctx, exceptions from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec From fe1b76bdef8e0b081d0ec84ee07deb91c8304cac Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 10 Jul 2020 13:23:13 +0200 Subject: [PATCH 09/22] refactor spec parsing, map_local candidate generation --- mitmproxy/addons/maplocal.py | 130 +++++++++++++++---------- mitmproxy/addons/mapremote.py | 40 +++++--- mitmproxy/addons/modifybody.py | 2 +- mitmproxy/addons/modifyheaders.py | 100 ++++++++----------- test/mitmproxy/addons/test_maplocal.py | 64 ++++++++---- 5 files changed, 187 insertions(+), 149 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index f8f509631..32bc533b2 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -1,52 +1,86 @@ import mimetypes import re import typing -import urllib from pathlib import Path + from werkzeug.security import safe_join -from mitmproxy import ctx, exceptions, http -from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec +from mitmproxy import ctx, exceptions, flowfilter, http +from mitmproxy.addons.modifyheaders import parse_spec + + +class MapLocalSpec(typing.NamedTuple): + matches: flowfilter.TFilter + regex: str + local_path: Path + + +def parse_map_local_spec(option: str) -> MapLocalSpec: + filter, regex, replacement = parse_spec(option) + + try: + re.compile(regex) + except re.error as e: + raise ValueError(f"Invalid regular expression {regex!r} ({e})") + + try: + path = Path(replacement).expanduser().resolve(strict=True) + except FileNotFoundError as e: + raise ValueError(f"Invalid file path: {replacement} ({e})") + + return MapLocalSpec(filter, regex, path) def get_mime_type(file_path: str) -> str: mimetype = ( - mimetypes.guess_type(file_path)[0] - or "application/octet-stream" + mimetypes.guess_type(file_path)[0] + or "application/octet-stream" ) return mimetype -def file_candidates(url: str, base_path: str) -> typing.List[Path]: +def _safe_path_join(root: Path, untrusted: str) -> Path: + """Join a Path element with an untrusted str. + + This is just a convenience wrapper for werkzeug's safe_join.""" + untrusted_parts = Path(untrusted).parts + joined = safe_join( + root.as_posix(), + *untrusted_parts + ) + if joined is None: + raise ValueError("Untrusted paths.") + return Path(joined) + + +def file_candidates(url: str, spec: MapLocalSpec) -> typing.List[Path]: candidates = [] - parsed_url = urllib.parse.urlparse(url) - path_components = parsed_url.path.lstrip("/").split("/") - filename = path_components.pop() - # todo: we may want to consider other filenames such as index.htm) - if not filename: - filename = 'index.html' + m = re.search(spec.regex, url) + assert m + if m.groups(): + suffix = m.group(1) + else: + suffix = re.split(spec.regex, url, maxsplit=1)[1] + suffix = suffix.split("?")[0] # remove query string - # construct all possible paths - while True: - components_with_filename = tuple(path_components + [filename]) - candidate_path = safe_join(base_path, *components_with_filename) - if candidate_path: - candidates.append( - Path(candidate_path) - ) + suffix = re.sub(r"[^0-9a-zA-Z-_.=(),/]", "_", suffix.strip("/")) - if not path_components: - break - - path_components.pop() + if suffix: + try: + candidates.append(_safe_path_join(spec.local_path, suffix)) + candidates.append(_safe_path_join(spec.local_path, f"{suffix}/index.html")) + except ValueError: + return [] + else: + candidates.append(spec.local_path / "index.html") return candidates class MapLocal: def __init__(self): - self.replacements: typing.List[ModifySpec] = [] + self.replacements: typing.List[MapLocalSpec] = [] def load(self, loader): loader.add_option( @@ -63,7 +97,7 @@ class MapLocal: self.replacements = [] for option in ctx.options.map_local: try: - spec = parse_modify_spec(option, True, True) + spec = parse_map_local_spec(option) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse map_local option {option}: {e}") from e @@ -74,31 +108,25 @@ class MapLocal: return for spec in self.replacements: - req = flow.request - url = req.pretty_url - base_path = Path(spec.replacement) + url = flow.request.pretty_url - if spec.matches(flow) and re.search(spec.subject, url.encode("utf8", "surrogateescape")): - replacement_path = None - if base_path.is_file(): - replacement_path = base_path - elif base_path.is_dir(): - candidates = file_candidates(url, str(base_path)) - for candidate in candidates: - # check that path is not outside of the user-defined base_path - if candidate.is_file() and base_path in candidate.parents: - replacement_path = candidate + if spec.matches(flow) and re.search(spec.regex, url): + + local_file: typing.Optional[Path] = None + + if spec.local_path.is_file(): + local_file = spec.local_path + elif spec.local_path.is_dir(): + for candidate in file_candidates(url, spec): + if candidate.is_file(): + local_file = candidate break - if replacement_path: - try: - flow.response = http.HTTPResponse.make( - 200, - replacement_path.read_bytes(), - {"Content-Type": get_mime_type(str(replacement_path))} - ) - # only set flow.response once, for the first matching rule - break - except IOError: - ctx.log.warn(f"Could not read replacement file {replacement_path}") - return + if local_file: + flow.response = http.HTTPResponse.make( + 200, + local_file.read_bytes(), + {"Content-Type": get_mime_type(str(local_file))} + ) + # only set flow.response once, for the first matching rule + return diff --git a/mitmproxy/addons/mapremote.py b/mitmproxy/addons/mapremote.py index b6d331920..8cd522b45 100644 --- a/mitmproxy/addons/mapremote.py +++ b/mitmproxy/addons/mapremote.py @@ -1,22 +1,38 @@ import re import typing -from mitmproxy import ctx, exceptions, http -from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifySpec +from mitmproxy import ctx, exceptions, flowfilter, http +from mitmproxy.addons.modifyheaders import parse_spec + + +class MapRemoteSpec(typing.NamedTuple): + matches: flowfilter.TFilter + subject: str + replacement: str + + +def parse_map_remote_spec(option: str) -> MapRemoteSpec: + spec = MapRemoteSpec(*parse_spec(option)) + + try: + re.compile(spec.subject) + except re.error as e: + raise ValueError(f"Invalid regular expression {spec.subject!r} ({e})") + + return spec class MapRemote: def __init__(self): - self.replacements: typing.List[ModifySpec] = [] + self.replacements: typing.List[MapRemoteSpec] = [] def load(self, loader): loader.add_option( "map_remote", typing.Sequence[str], [], """ Map remote resources to another remote URL using a pattern of the form - "[/flow-filter]/url-regex/[@]replacement", where the separator can - be any character. The @ allows to provide a file path that is - used to read the replacement string. + "[/flow-filter]/url-regex/replacement", where the separator can + be any character. """ ) @@ -25,7 +41,7 @@ class MapRemote: self.replacements = [] for option in ctx.options.map_remote: try: - spec = parse_modify_spec(option, True, False) + spec = parse_map_remote_spec(option) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse map_remote option {option}: {e}") from e @@ -36,14 +52,8 @@ class MapRemote: return for spec in self.replacements: if spec.matches(flow): - try: - replacement = spec.read_replacement() - except IOError as e: - ctx.log.warn(f"Could not read replacement file: {e}") - continue - - url = flow.request.pretty_url.encode("utf8", "surrogateescape") - new_url = re.sub(spec.subject, replacement, url) + url = flow.request.pretty_url + new_url = re.sub(spec.subject, spec.replacement, url) # this is a bit messy: setting .url also updates the host header, # so we really only do that if the replacement affected the URL. if url != new_url: diff --git a/mitmproxy/addons/modifybody.py b/mitmproxy/addons/modifybody.py index 9ec6e88e4..d4a898f6f 100644 --- a/mitmproxy/addons/modifybody.py +++ b/mitmproxy/addons/modifybody.py @@ -24,7 +24,7 @@ class ModifyBody: self.replacements = [] for option in ctx.options.modify_body: try: - spec = parse_modify_spec(option, True, False) + spec = parse_modify_spec(option, True) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse modify_body option {option}: {e}") from e diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index 030ef5de9..098ef8138 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -7,10 +7,36 @@ from mitmproxy.net.http import Headers from mitmproxy.utils import strutils +def _match_all(flow) -> bool: + return True + + +def parse_spec(option: str) -> typing.Tuple[flowfilter.TFilter, str, str]: + """ + Parse strings in the following format: + + [/flow-filter]/subject/replacement + + """ + sep, rem = option[0], option[1:] + parts = rem.split(sep, 2) + if len(parts) == 2: + subject, replacement = parts + return _match_all, subject, replacement + elif len(parts) == 3: + patt, subject, replacement = parts + flow_filter = flowfilter.parse(patt) + if not flow_filter: + raise ValueError(f"Invalid filter pattern: {patt}") + return flow_filter, subject, replacement + else: + raise ValueError("Invalid number of parameters (2 or 3 are expected)") + + class ModifySpec(typing.NamedTuple): matches: flowfilter.TFilter subject: bytes - replacement: str + replacement_str: str def read_replacement(self) -> bytes: """ @@ -20,79 +46,29 @@ class ModifySpec(typing.NamedTuple): Raises: - IOError if the file cannot be read. """ - if self.replacement.startswith("@"): - return Path(self.replacement[1:]).expanduser().read_bytes() + if self.replacement_str.startswith("@"): + return Path(self.replacement_str[1:]).expanduser().read_bytes() else: # We could cache this at some point, but unlikely to be a problem. - return strutils.escaped_str_to_bytes(self.replacement) + return strutils.escaped_str_to_bytes(self.replacement_str) -def _match_all(flow) -> bool: - return True +def parse_modify_spec(option: str, subject_is_regex: bool) -> ModifySpec: + flow_filter, subject_str, replacement = parse_spec(option) - -def parse_modify_spec(option, subject_is_regex: bool, replacement_is_path: bool) -> ModifySpec: - """ - The form for the modify_*, map_remote, and map_local options is as follows: - - * modify_body: [/flow-filter]/body-regex/[@]replacement - * modify_headers: [/flow-filter]/header-name/[@]header-value - * map_local: [:flow-filter]:url-regex:path - * map_remote: [:flow-filter]:url-regex:[@]replacement - - The @ allows to provide a file path that is used to read the respective option. - The addons ModifyHeaders, ModifyBody, MapRemote, and MapLocal use ModifySpec - to represent a single rule. - - The first character specifies the separator. Example: - - :~q:foo:bar - - If only two clauses are specified, the flow filter is set to - match universally (i.e. ".*"). Example: - - /foo/bar - - Clauses are parsed from left to right. Extra separators are taken to be - part of the final clause. For instance, the last parameter (header-value, - replace, or path) below is "foo/bar/": - - /one/two/foo/bar/ - """ - sep, rem = option[0], option[1:] - parts = rem.split(sep, 2) - if len(parts) == 2: - flow_filter = _match_all - subject, replacement = parts - elif len(parts) == 3: - flow_filter_pattern, subject, replacement = parts - flow_filter = flowfilter.parse(flow_filter_pattern) # type: ignore - if not flow_filter: - raise ValueError(f"Invalid filter pattern: {flow_filter_pattern}") - else: - raise ValueError("Invalid number of parameters (2 or 3 are expected)") - - subject = strutils.escaped_str_to_bytes(subject) + subject = strutils.escaped_str_to_bytes(subject_str) if subject_is_regex: try: re.compile(subject) except re.error as e: raise ValueError(f"Invalid regular expression {subject!r} ({e})") - if replacement_is_path: - path = Path(replacement) - try: - replacement = str(path.expanduser().resolve(strict=True)) - except FileNotFoundError as e: - raise ValueError(f"Invalid file path: {replacement} ({e})") - spec = ModifySpec(flow_filter, subject, replacement) - if not replacement_is_path: - try: - spec.read_replacement() - except IOError as e: - raise ValueError(f"Invalid file path: {replacement[1:]} ({e})") + try: + spec.read_replacement() + except IOError as e: + raise ValueError(f"Invalid file path: {replacement[1:]} ({e})") return spec @@ -116,7 +92,7 @@ class ModifyHeaders: if "modify_headers" in updated: for option in ctx.options.modify_headers: try: - spec = parse_modify_spec(option, False, False) + spec = parse_modify_spec(option, False) except ValueError as e: raise exceptions.OptionsError(f"Cannot parse modify_headers option {option}: {e}") from e self.replacements.append(spec) diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index 03e5d4626..fddec873c 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -1,35 +1,59 @@ +import re +from pathlib import Path + import pytest -from mitmproxy.addons.maplocal import MapLocal, file_candidates +from mitmproxy.addons.maplocal import MapLocal, MapLocalSpec, file_candidates +from mitmproxy.addons.modifyheaders import parse_spec from mitmproxy.test import taddons from mitmproxy.test import tflow -from mitmproxy.addons.modifyheaders import parse_modify_spec - @pytest.mark.parametrize( - "url,spec,expected_candidates", [ + "url,spec,expected_candidates", + [ + # trailing slashes + ("https://example.com/foo", ":example.com/foo:/tmp", ["/tmp/index.html"]), + ("https://example.com/foo/", ":example.com/foo:/tmp", ["/tmp/index.html"]), + ("https://example.com/foo", ":example.com/foo:/tmp/", ["/tmp/index.html"]), + ] + [ + # simple prefixes + ("http://example/foo/bar.jpg", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example/foo/bar.jpg", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example/foo/bar.jpg?query", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example/foo/bar/baz.jpg", ":example/foo:/tmp", ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]), + ] + [ + # index.html + ("https://example.com/foo", ":example.com/foo:/tmp", ["/tmp/index.html"]), + ("https://example.com/foo/", ":example.com/foo:/tmp", ["/tmp/index.html"]), + ("https://example.com/foo/bar", ":example.com/foo:/tmp", ["/tmp/bar", "/tmp/bar/index.html"]), + ("https://example.com/foo/bar/", ":example.com/foo:/tmp", ["/tmp/bar", "/tmp/bar/index.html"]), + ] + [ + # regex ( - "https://example.org/img/topic/subtopic/test.jpg", - ":example.com/foo:/tmp", - ["/tmp/img/topic/subtopic/test.jpg", "/tmp/img/topic/test.jpg", "/tmp/img/test.jpg", "/tmp/test.jpg"] - ), - ( - "https://example.org/img/topic/subtopic/", - ":/img:/tmp", - ["/tmp/img/topic/subtopic/index.html", "/tmp/img/topic/index.html", "/tmp/img/index.html", "/tmp/index.html"] - ), - ( - "https://example.org", - ":org:/tmp", - ["/tmp/index.html"] + "https://example/view.php?f=foo.jpg", + ":example/view.php\\?f=(.+):/tmp", + ["/tmp/foo.jpg", "/tmp/foo.jpg/index.html"] + ), ( + "https://example/results?id=1&foo=2", + ":example/(results\\?id=.+):/tmp", + ["/tmp/results_id=1_foo=2", "/tmp/results_id=1_foo=2/index.html"] ), + ] + [ + # test directory traversal detection + ("https://example.com/../../../../../../etc/passwd", ":example.com:/tmp", []), + # those get already sanitized to benign versions before they reach our detection: + ("https://example.com/C:\\foo.txt", ":example.com:/tmp", ["/tmp/C__foo.txt", "/tmp/C__foo.txt/index.html"]), + ("https://example.com//etc/passwd", ":example.com:/tmp", ["/tmp/etc/passwd", "/tmp/etc/passwd/index.html"]), ] ) def test_file_candidates(url, spec, expected_candidates): - spec = parse_modify_spec(spec, True, True) - candidates = file_candidates(url, spec.replacement) - assert [str(x) for x in candidates] == expected_candidates + # we circumvent the path existence checks here to simplify testing + filt, subj, repl = parse_spec(spec) + spec = MapLocalSpec(filt, subj, Path(repl)) + + candidates = file_candidates(url, spec) + assert [x.as_posix() for x in candidates] == expected_candidates class TestMapLocal: From 3bbcbacd68be297e0a0fdcc27ea60aed3f5f3d03 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 10 Jul 2020 14:28:12 +0200 Subject: [PATCH 10/22] map local: 404 if not found --- mitmproxy/addons/maplocal.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 32bc533b2..a73fa54f4 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -54,8 +54,10 @@ def _safe_path_join(root: Path, untrusted: str) -> Path: def file_candidates(url: str, spec: MapLocalSpec) -> typing.List[Path]: - candidates = [] - + """ + Get all potential file candidates given a URL and a mapping spec ordered by preference. + This function already assumes that the spec regex matches the URL. + """ m = re.search(spec.regex, url) assert m if m.groups(): @@ -64,18 +66,18 @@ def file_candidates(url: str, spec: MapLocalSpec) -> typing.List[Path]: suffix = re.split(spec.regex, url, maxsplit=1)[1] suffix = suffix.split("?")[0] # remove query string - suffix = re.sub(r"[^0-9a-zA-Z-_.=(),/]", "_", suffix.strip("/")) + suffix = re.sub(r"[^0-9a-zA-Z\-_.=(),/]", "_", suffix.strip("/")) if suffix: try: - candidates.append(_safe_path_join(spec.local_path, suffix)) - candidates.append(_safe_path_join(spec.local_path, f"{suffix}/index.html")) + return [ + _safe_path_join(spec.local_path, suffix), + _safe_path_join(spec.local_path, f"{suffix}/index.html") + ] except ValueError: return [] else: - candidates.append(spec.local_path / "index.html") - - return candidates + return [spec.local_path / "index.html"] class MapLocal: @@ -107,10 +109,12 @@ class MapLocal: if flow.reply and flow.reply.has_message: return - for spec in self.replacements: - url = flow.request.pretty_url + url = flow.request.pretty_url + any_spec_matches = False + for spec in self.replacements: if spec.matches(flow) and re.search(spec.regex, url): + any_spec_matches = True local_file: typing.Optional[Path] = None @@ -130,3 +134,5 @@ class MapLocal: ) # only set flow.response once, for the first matching rule return + if any_spec_matches: + flow.response = http.HTTPResponse.make(404) From c3894b0f58fd7a9b24f467a44a187df41299453f Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 10 Jul 2020 14:29:51 +0200 Subject: [PATCH 11/22] map local: minor docs improvements --- mitmproxy/addons/maplocal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index a73fa54f4..96349fd72 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -42,7 +42,8 @@ def get_mime_type(file_path: str) -> str: def _safe_path_join(root: Path, untrusted: str) -> Path: """Join a Path element with an untrusted str. - This is just a convenience wrapper for werkzeug's safe_join.""" + This is a convenience wrapper for werkzeug's safe_join, + raising a ValueError if the path is malformed.""" untrusted_parts = Path(untrusted).parts joined = safe_join( root.as_posix(), From 257c178bbe09c647b35126c42be0343773e51daf Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 10 Jul 2020 20:44:23 +0200 Subject: [PATCH 12/22] map local: fix tests --- test/mitmproxy/addons/test_maplocal.py | 32 ++++++++--------- test/mitmproxy/addons/test_mapremote.py | 38 --------------------- test/mitmproxy/addons/test_modifyheaders.py | 14 ++++---- 3 files changed, 23 insertions(+), 61 deletions(-) diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index fddec873c..fa10df1aa 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -1,4 +1,3 @@ -import re from pathlib import Path import pytest @@ -18,10 +17,11 @@ from mitmproxy.test import tflow ("https://example.com/foo", ":example.com/foo:/tmp/", ["/tmp/index.html"]), ] + [ # simple prefixes - ("http://example/foo/bar.jpg", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), - ("https://example/foo/bar.jpg", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), - ("https://example/foo/bar.jpg?query", ":example/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), - ("https://example/foo/bar/baz.jpg", ":example/foo:/tmp", ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]), + ("http://example.com/foo/bar.jpg", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example.com/foo/bar.jpg", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example.com/foo/bar.jpg?query", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), + ("https://example.com/foo/bar/baz.jpg", ":example.com/foo:/tmp", ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]), + ("https://example.com/foo/bar.jpg", ":/foo/bar.jpg:/tmp", ["/tmp/index.html"]), ] + [ # index.html ("https://example.com/foo", ":example.com/foo:/tmp", ["/tmp/index.html"]), @@ -61,41 +61,41 @@ class TestMapLocal: ml = MapLocal() with taddons.context(ml) as tctx: - tmpfile = tmpdir.join("test1.jpg") + tmpfile = tmpdir.join("foo.jpg") tmpfile.write("foo") tctx.configure( ml, map_local=[ - ":jpg:" + str(tmpdir) + "://example.org/images:" + str(tmpdir) ] ) f = tflow.tflow() - f.request.url = b"https://example.org/images/test1.jpg" + f.request.url = b"https://example.org/images/foo.jpg" ml.request(f) assert f.response.content == b"foo" - tmpfile = tmpdir.join("images", "test2.jpg") + tmpfile = tmpdir.join("images", "bar.jpg") tmpfile.write("bar", ensure=True) tctx.configure( ml, map_local=[ - ":jpg:" + str(tmpdir) + "://example.org:" + str(tmpdir) ] ) f = tflow.tflow() - f.request.url = b"https://example.org/images/test2.jpg" + f.request.url = b"https://example.org/images/bar.jpg" ml.request(f) assert f.response.content == b"bar" - tmpfile = tmpdir.join("images", "test3.jpg") - tmpfile.write("foobar", ensure=True) + tmpfile = tmpdir.join("foofoobar.jpg") + tmpfile.write("foofoobar", ensure=True) tctx.configure( ml, map_local=[ - ":jpg:" + str(tmpfile) + ":example.org/foo/foo/bar.jpg:" + str(tmpfile) ] ) f = tflow.tflow() - f.request.url = b"https://example.org/foo.jpg" + f.request.url = b"https://example.org/foo/foo/bar.jpg" ml.request(f) - assert f.response.content == b"foobar" + assert f.response.content == b"foofoobar" diff --git a/test/mitmproxy/addons/test_mapremote.py b/test/mitmproxy/addons/test_mapremote.py index 970682b0f..3e06457cc 100644 --- a/test/mitmproxy/addons/test_mapremote.py +++ b/test/mitmproxy/addons/test_mapremote.py @@ -42,41 +42,3 @@ class TestMapRemote: f.kill() mr.request(f) assert f.request.url == "https://example.org/images/test.jpg" - - -class TestMapRemoteFile: - def test_simple(self, tmpdir): - mr = mapremote.MapRemote() - with taddons.context(mr) as tctx: - tmpfile = tmpdir.join("replacement") - tmpfile.write("mitmproxy.org") - tctx.configure( - mr, - map_remote=["|example.org|@" + str(tmpfile)] - ) - f = tflow.tflow() - f.request.url = b"https://example.org/test" - mr.request(f) - assert f.request.url == "https://mitmproxy.org/test" - - @pytest.mark.asyncio - async def test_nonexistent(self, tmpdir): - mr = mapremote.MapRemote() - with taddons.context(mr) as tctx: - with pytest.raises(Exception, match="Invalid file path"): - tctx.configure( - mr, - map_remote=[":~q:example.org:@nonexistent"] - ) - - tmpfile = tmpdir.join("replacement") - tmpfile.write("mitmproxy.org") - tctx.configure( - mr, - map_remote=["|example.org|@" + str(tmpfile)] - ) - tmpfile.remove() - f = tflow.tflow() - f.request.url = b"https://example.org/test" - mr.request(f) - assert await tctx.master.await_log("could not read") diff --git a/test/mitmproxy/addons/test_modifyheaders.py b/test/mitmproxy/addons/test_modifyheaders.py index 42281322c..f3ebd279f 100644 --- a/test/mitmproxy/addons/test_modifyheaders.py +++ b/test/mitmproxy/addons/test_modifyheaders.py @@ -7,32 +7,32 @@ from mitmproxy.addons.modifyheaders import parse_modify_spec, ModifyHeaders def test_parse_modify_spec(): - spec = parse_modify_spec("/foo/bar/voing", True, False) + spec = parse_modify_spec("/foo/bar/voing", True) assert spec.matches.pattern == "foo" assert spec.subject == b"bar" assert spec.read_replacement() == b"voing" - spec = parse_modify_spec("/foo/bar/vo/ing/", False, False) + spec = parse_modify_spec("/foo/bar/vo/ing/", False) assert spec.matches.pattern == "foo" assert spec.subject == b"bar" assert spec.read_replacement() == b"vo/ing/" - spec = parse_modify_spec("/bar/voing", False, False) + spec = parse_modify_spec("/bar/voing", False) assert spec.matches(tflow.tflow()) assert spec.subject == b"bar" assert spec.read_replacement() == b"voing" with pytest.raises(ValueError, match="Invalid number of parameters"): - parse_modify_spec("/", False, False) + parse_modify_spec("/", False) with pytest.raises(ValueError, match="Invalid filter pattern"): - parse_modify_spec("/~b/one/two", False, False) + parse_modify_spec("/~b/one/two", False) with pytest.raises(ValueError, match="Invalid filter pattern"): - parse_modify_spec("/~b/one/two", False, False) + parse_modify_spec("/~b/one/two", False) with pytest.raises(ValueError, match="Invalid regular expression"): - parse_modify_spec("/[/two", True, False) + parse_modify_spec("/[/two", True) class TestModifyHeaders: From 7781dcb15f2c3f87563d12ca9839ced0662c42d7 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 10 Jul 2020 20:44:54 +0200 Subject: [PATCH 13/22] map local: fix mimetype detection --- mitmproxy/addons/maplocal.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 96349fd72..1b2cda824 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -31,14 +31,6 @@ def parse_map_local_spec(option: str) -> MapLocalSpec: return MapLocalSpec(filter, regex, path) -def get_mime_type(file_path: str) -> str: - mimetype = ( - mimetypes.guess_type(file_path)[0] - or "application/octet-stream" - ) - return mimetype - - def _safe_path_join(root: Path, untrusted: str) -> Path: """Join a Path element with an untrusted str. @@ -127,11 +119,15 @@ class MapLocal: local_file = candidate break + headers = {} + mimetype = mimetypes.guess_type(str(local_file))[0] + if mimetype: + headers = {"Content-Type": mimetype} if local_file: flow.response = http.HTTPResponse.make( 200, local_file.read_bytes(), - {"Content-Type": get_mime_type(str(local_file))} + headers ) # only set flow.response once, for the first matching rule return From d6293004e0f66f92b49ee6e037aa768e0f7cc6f4 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 10 Jul 2020 20:45:50 +0200 Subject: [PATCH 14/22] map local: update docs --- docs/src/content/overview-features.md | 80 +++++++++++++++++---------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/docs/src/content/overview-features.md b/docs/src/content/overview-features.md index d87f99371..ceb14677c 100644 --- a/docs/src/content/overview-features.md +++ b/docs/src/content/overview-features.md @@ -51,7 +51,7 @@ define redirections of HTTP requests to local files or diretories. The local file is fetched instead of the original resource and the corresponding HTTP response is returned transparently to the client. The mime type of the local file is guessed to set the `Content-Type` header. -`map_local` patterns looks like this: +`map_local` patterns look like this: ``` |flow-filter|url-regex|file-path @@ -63,25 +63,56 @@ The mime type of the local file is guessed to set the `Content-Type` header. * **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) that defines which requests the `map_local` option applies to. -* **url-regex** is a valid Python regular expression on the request URL that defines which requests the `map_local` option applies to. +* **url-regex** is a valid Python regular expression on the request URL. It serves two purposes. +First, it must match a part of the request URL to ensure that the rule is applied to the request. +Second, it is used to split the request URL in two parts. The right part is used as a suffix +that is appended to the **diretory-path**, as shown in the first example below. +If **url-regex** contains a regex group, the first group is used as the suffix instead of the +right part of the split, as shown in the second example below. * **file-path** is a path to a file that is served instead of the original resource. * **diretory-path** is a path to a directory that is used to look for the resource -to serve instead of the original resource. mitmproxy tries to select the correct file -within **diretory-path** automatically. It first tries `diretory-path/url-path` and -strips the deepest directory repeatedly until it finds an existing file. -For example, with the **diretory-path** `/local` and the request URL `http://example.org/media/img/foo.jpg`, -mitmproxy looks for `/local/media/img/foo.jpg`, `/local/media/foo.jpg`, and `/local/foo.jpg`, -in this order. If no file is found, the original resource is served instead. +to serve instead of the original resource. mitmproxy tries to find the local file as +explained for **url-regex** and shown in the examples below. If the file is not +found, mitmproxy tries to append `index.html` to the resulting path. +Otherwise, a 404 response without content is sent to the client. ### Examples -Map all requests for `example.org/css/*` to the local directory `~/local-css`. +Map all requests for `example.org/css*` to the local directory `~/static-css`. + +
+                  ┌── url-regex ──┬─ directory-path ─┐
+map_local option: |example.com/css|~/static-css
+                            │
+                            │    URL is split here
+                            ▼            ▼
+HTTP Request URL: https://example.com/css/print/main.css?timestamp=123
+                                               │                ▼
+                                               ▼              query string is ignored
+Served File:      Preferred: ~/static-css/print/main.css
+                   Fallback: ~/static-css/print/main.css/index.html
+                  Otherwise: 404 response without content
+
+ +Map all `GET` requests for `example.org/index.php?page=` to the local directory `~/static-dir/`. + +
+                    flow
+                  ┌filter┬─────────── url-regex ───────────┬─ directory-path ─┐
+map_local option: |~m GET|example.com/index.php\\?page=(.+)|~/static-dir
+                            │                           │
+                            │                           │ regex group = suffix
+                            ▼                           ▼
+HTTP Request URL: https://example.com/index.php?page=aboutus
+                                                        │
+                                                        ▼
+Served File:                 Preferred: ~/static-dir/aboutus
+                              Fallback: ~/static-dir/aboutus/index.html
+                             Otherwise: 404 response without content
+
-``` -|//example.org/css/|~/local-css -``` Map all requests for `example.org/js/main.js` to the local file `~/main-local.js`. @@ -89,12 +120,6 @@ Map all requests for `example.org/js/main.js` to the local file `~/main-local.js |example.org/js/main.js|~/main-local.js ``` -Map all requests ending with `.jpg` to the local file `~/foo.jpg`. - -``` -|.*\.jpg$|~/foo.jpg -``` - ## Map Remote @@ -104,13 +129,11 @@ The substituted URL is fetched instead of the original resource and the corresponding HTTP response is returned transparently to the client. Note that if the original destination uses HTTP2, the substituted destination needs to support HTTP2 as well, otherwise the substituted request may fail. -`map_remote` patterns looks like this: +`map_remote` patterns look like this: ``` |flow-filter|url-regex|replacement -|flow-filter|url-regex|@file-path -|regex|replacement -|regex|@file-path +|url-regex|replacement ``` * **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) @@ -118,8 +141,7 @@ that defines which requests the `map_remote` option applies to. * **url-regex** is a valid Python regular expression that defines what gets replaced in the URLs of requests. -* **replacement** is a string literal that is substituted in. If the replacement string -literal starts with `@` as in `@file-path`, it is treated as a **file path** from which the replacement is read. +* **replacement** is a string literal that is substituted in. The _separator_ is arbitrary, and is defined by the first character. @@ -146,16 +168,16 @@ The `modify_body` option lets you specify an arbitrary number of patterns that define replacements within bodies of flows. `modify_body` patterns look like this: ``` -/flow-filter/regex/replacement -/flow-filter/regex/@file-path -/regex/replacement -/regex/@file-path +/flow-filter/body-regex/replacement +/flow-filter/body-regex/@file-path +/body-regex/replacement +/body-regex/@file-path ``` * **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) that defines which flows a replacement applies to. -* **regex** is a valid Python regular expression that defines what gets replaced. +* **body-regex** is a valid Python regular expression that defines what gets replaced. * **replacement** is a string literal that is substituted in. If the replacement string literal starts with `@` as in `@file-path`, it is treated as a **file path** from which the replacement is read. From f36dcd8915f0ac27eace2a2e066751888f21ee48 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 14 Jul 2020 15:55:31 +0200 Subject: [PATCH 15/22] revise map local docs provide a short summary at the start (with easy-to-copy examples) and then discuss the nitty-gritty details below. --- docs/src/content/overview-features.md | 62 +++++++++++++-------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/docs/src/content/overview-features.md b/docs/src/content/overview-features.md index ceb14677c..336e1aa27 100644 --- a/docs/src/content/overview-features.md +++ b/docs/src/content/overview-features.md @@ -47,43 +47,45 @@ option, to make sure the server responds with complete data. ## Map Local The `map_local` option lets you specify an arbitrary number of patterns that -define redirections of HTTP requests to local files or diretories. +define redirections of HTTP requests to local files or directories. The local file is fetched instead of the original resource -and the corresponding HTTP response is returned transparently to the client. -The mime type of the local file is guessed to set the `Content-Type` header. +and transparently returned to the client. + `map_local` patterns look like this: ``` -|flow-filter|url-regex|file-path -|flow-filter|url-regex|diretory-path -|url-regex|file-path -|url-regex|diretory-path +|url-regex|local-path +|flow-filter|url-regex|local-path ``` +* **local-path** is the file or directory that should be served to the client. + +* **url-regex** is a regular expression applied on the request URL. It must match for a redirect to take place. + * **flow-filter** is an optional mitmproxy [filter expression]({{< relref "concepts-filters">}}) -that defines which requests the `map_local` option applies to. - -* **url-regex** is a valid Python regular expression on the request URL. It serves two purposes. -First, it must match a part of the request URL to ensure that the rule is applied to the request. -Second, it is used to split the request URL in two parts. The right part is used as a suffix -that is appended to the **diretory-path**, as shown in the first example below. -If **url-regex** contains a regex group, the first group is used as the suffix instead of the -right part of the split, as shown in the second example below. - -* **file-path** is a path to a file that is served instead of the original resource. - -* **diretory-path** is a path to a directory that is used to look for the resource -to serve instead of the original resource. mitmproxy tries to find the local file as -explained for **url-regex** and shown in the examples below. If the file is not -found, mitmproxy tries to append `index.html` to the resulting path. -Otherwise, a 404 response without content is sent to the client. +that additionally constrains which requests will be redirected. ### Examples -Map all requests for `example.org/css*` to the local directory `~/static-css`. +Pattern | Description +------- | ----------- +`\|example.com/main.js\|~/main-local.js` | Replace `example.com/main.js` with `~/main-local.js`. +`\|example.com/static\|~/static` | Replace `example.com/static/foo/bar.css` with `~/static/foo/bar.css`. +`\|example.com/static/foo\|~/static` | Replace `example.com/static/foo/bar.css` with `~/static/bar.css`. +`\|~m GET\|example.com/static\|~/static` | Replace `example.com/static/foo/bar.css` with `~/static/foo/bar.css` (but only for GET requests). + +### Details + +If *local-path* is a file, this file will always be served. File changes will be reflected immediately, there is no caching. + +If *local-path* is a directory, *url-regex* is used to split the request URL in two parts and part on the right is appended to *local-path*, excluding the query string. +However, if *url-regex* contains a regex capturing group, this behavior changes and the first capturing group is appended instead (and query strings are not stripped). +Special characters are mapped to `_`. If the file cannot be found, `/index.html` is appended and we try again. Directory traversal outside of the originally specified directory is not possible. + +To illustrate this, consider the following example which maps all requests for `example.org/css*` to the local directory `~/static-css`.
-                  ┌── url-regex ──┬─ directory-path ─┐
+                  ┌── url regex ──┬─ local path ─┐
 map_local option: |example.com/css|~/static-css
                             │
                             │    URL is split here
@@ -96,11 +98,12 @@ Served File:      Preferred: ~/static-css
 
-Map all `GET` requests for `example.org/index.php?page=` to the local directory `~/static-dir/`.
+If the file depends on the query string, we can use regex capturing groups. In this example, all `GET` requests for 
+`example.org/index.php?page=` are mapped to `~/static-dir/`:
 
 
                     flow
-                  ┌filter┬─────────── url-regex ───────────┬─ directory-path ─┐
+                  ┌filter┬─────────── url regex ───────────┬─ local path ─┐
 map_local option: |~m GET|example.com/index.php\\?page=(.+)|~/static-dir
                             │                           │
                             │                           │ regex group = suffix
@@ -114,11 +117,6 @@ Served File:                 Preferred: ~/static-dir
 
-Map all requests for `example.org/js/main.js` to the local file `~/main-local.js`. - -``` -|example.org/js/main.js|~/main-local.js -``` ## Map Remote From 627a03c8e0696d22a796f3ca76a11ed94953719c Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 17 Jul 2020 14:56:33 +0200 Subject: [PATCH 16/22] move parse_spec to util.spec --- mitmproxy/addons/maplocal.py | 2 +- mitmproxy/addons/mapremote.py | 2 +- mitmproxy/addons/modifyheaders.py | 27 +------------------------ mitmproxy/utils/spec.py | 28 ++++++++++++++++++++++++++ test/mitmproxy/addons/test_maplocal.py | 2 +- 5 files changed, 32 insertions(+), 29 deletions(-) create mode 100644 mitmproxy/utils/spec.py diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 1b2cda824..36c019357 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -6,7 +6,7 @@ from pathlib import Path from werkzeug.security import safe_join from mitmproxy import ctx, exceptions, flowfilter, http -from mitmproxy.addons.modifyheaders import parse_spec +from mitmproxy.utils.spec import parse_spec class MapLocalSpec(typing.NamedTuple): diff --git a/mitmproxy/addons/mapremote.py b/mitmproxy/addons/mapremote.py index 8cd522b45..23ab3025f 100644 --- a/mitmproxy/addons/mapremote.py +++ b/mitmproxy/addons/mapremote.py @@ -2,7 +2,7 @@ import re import typing from mitmproxy import ctx, exceptions, flowfilter, http -from mitmproxy.addons.modifyheaders import parse_spec +from mitmproxy.utils.spec import parse_spec class MapRemoteSpec(typing.NamedTuple): diff --git a/mitmproxy/addons/modifyheaders.py b/mitmproxy/addons/modifyheaders.py index 098ef8138..795d05b0f 100644 --- a/mitmproxy/addons/modifyheaders.py +++ b/mitmproxy/addons/modifyheaders.py @@ -5,32 +5,7 @@ from pathlib import Path from mitmproxy import ctx, exceptions, flowfilter, http from mitmproxy.net.http import Headers from mitmproxy.utils import strutils - - -def _match_all(flow) -> bool: - return True - - -def parse_spec(option: str) -> typing.Tuple[flowfilter.TFilter, str, str]: - """ - Parse strings in the following format: - - [/flow-filter]/subject/replacement - - """ - sep, rem = option[0], option[1:] - parts = rem.split(sep, 2) - if len(parts) == 2: - subject, replacement = parts - return _match_all, subject, replacement - elif len(parts) == 3: - patt, subject, replacement = parts - flow_filter = flowfilter.parse(patt) - if not flow_filter: - raise ValueError(f"Invalid filter pattern: {patt}") - return flow_filter, subject, replacement - else: - raise ValueError("Invalid number of parameters (2 or 3 are expected)") +from mitmproxy.utils.spec import parse_spec class ModifySpec(typing.NamedTuple): diff --git a/mitmproxy/utils/spec.py b/mitmproxy/utils/spec.py new file mode 100644 index 000000000..c38fe92e8 --- /dev/null +++ b/mitmproxy/utils/spec.py @@ -0,0 +1,28 @@ +import typing +from mitmproxy import flowfilter + + +def _match_all(flow) -> bool: + return True + + +def parse_spec(option: str) -> typing.Tuple[flowfilter.TFilter, str, str]: + """ + Parse strings in the following format: + + [/flow-filter]/subject/replacement + + """ + sep, rem = option[0], option[1:] + parts = rem.split(sep, 2) + if len(parts) == 2: + subject, replacement = parts + return _match_all, subject, replacement + elif len(parts) == 3: + patt, subject, replacement = parts + flow_filter = flowfilter.parse(patt) + if not flow_filter: + raise ValueError(f"Invalid filter pattern: {patt}") + return flow_filter, subject, replacement + else: + raise ValueError("Invalid number of parameters (2 or 3 are expected)") diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index fa10df1aa..3ba47d9d4 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -3,7 +3,7 @@ from pathlib import Path import pytest from mitmproxy.addons.maplocal import MapLocal, MapLocalSpec, file_candidates -from mitmproxy.addons.modifyheaders import parse_spec +from mitmproxy.utils.spec import parse_spec from mitmproxy.test import taddons from mitmproxy.test import tflow From 565cbb2504f91c81eb1c27c67800d1faee64259b Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 17 Jul 2020 14:58:47 +0200 Subject: [PATCH 17/22] add server header to responses from MapLocal --- mitmproxy/addons/maplocal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 36c019357..80dd3d6ca 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -5,7 +5,7 @@ from pathlib import Path from werkzeug.security import safe_join -from mitmproxy import ctx, exceptions, flowfilter, http +from mitmproxy import ctx, exceptions, flowfilter, http, version from mitmproxy.utils.spec import parse_spec @@ -119,7 +119,7 @@ class MapLocal: local_file = candidate break - headers = {} + headers = {"Server": version.MITMPROXY} mimetype = mimetypes.guess_type(str(local_file))[0] if mimetype: headers = {"Content-Type": mimetype} From b3c809c45ab00284a4f7043ac5c4bdb358b7e4bb Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 17 Jul 2020 17:29:36 +0200 Subject: [PATCH 18/22] map local: improve candidate generation (url dec.) --- mitmproxy/addons/maplocal.py | 14 ++++++++++---- test/mitmproxy/addons/test_maplocal.py | 8 ++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 80dd3d6ca..85858d634 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -1,6 +1,7 @@ import mimetypes import re import typing +import urllib from pathlib import Path from werkzeug.security import safe_join @@ -58,14 +59,19 @@ def file_candidates(url: str, spec: MapLocalSpec) -> typing.List[Path]: else: suffix = re.split(spec.regex, url, maxsplit=1)[1] suffix = suffix.split("?")[0] # remove query string - - suffix = re.sub(r"[^0-9a-zA-Z\-_.=(),/]", "_", suffix.strip("/")) + suffix = suffix.strip("/").replace("\\", "/") if suffix: + decoded_suffix = urllib.parse.unquote(suffix) + simplified_suffix = re.sub(r"[^0-9a-zA-Z\-_.=(),/]", "_", decoded_suffix) + + suffix_candidates = [decoded_suffix, f"{decoded_suffix}/index.html"] + if decoded_suffix != simplified_suffix: + suffix_candidates.append(simplified_suffix) + suffix_candidates.append(f"{simplified_suffix}/index.html") try: return [ - _safe_path_join(spec.local_path, suffix), - _safe_path_join(spec.local_path, f"{suffix}/index.html") + _safe_path_join(spec.local_path, suff) for suff in suffix_candidates ] except ValueError: return [] diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py index 3ba47d9d4..5bfd95a44 100644 --- a/test/mitmproxy/addons/test_maplocal.py +++ b/test/mitmproxy/addons/test_maplocal.py @@ -22,6 +22,10 @@ from mitmproxy.test import tflow ("https://example.com/foo/bar.jpg?query", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]), ("https://example.com/foo/bar/baz.jpg", ":example.com/foo:/tmp", ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]), ("https://example.com/foo/bar.jpg", ":/foo/bar.jpg:/tmp", ["/tmp/index.html"]), + ] + [ + # URL decode and special characters + ("http://example.com/foo%20bar.jpg", ":example.com:/tmp", ["/tmp/foo bar.jpg", "/tmp/foo bar.jpg/index.html", "/tmp/foo_bar.jpg", "/tmp/foo_bar.jpg/index.html"]), + ("http://example.com/fóobår.jpg", ":example.com:/tmp", ["/tmp/fóobår.jpg", "/tmp/fóobår.jpg/index.html", "/tmp/f_ob_r.jpg", "/tmp/f_ob_r.jpg/index.html"]), ] + [ # index.html ("https://example.com/foo", ":example.com/foo:/tmp", ["/tmp/index.html"]), @@ -37,13 +41,13 @@ from mitmproxy.test import tflow ), ( "https://example/results?id=1&foo=2", ":example/(results\\?id=.+):/tmp", - ["/tmp/results_id=1_foo=2", "/tmp/results_id=1_foo=2/index.html"] + ["/tmp/results?id=1&foo=2", "/tmp/results?id=1&foo=2/index.html", "/tmp/results_id=1_foo=2", "/tmp/results_id=1_foo=2/index.html"] ), ] + [ # test directory traversal detection ("https://example.com/../../../../../../etc/passwd", ":example.com:/tmp", []), # those get already sanitized to benign versions before they reach our detection: - ("https://example.com/C:\\foo.txt", ":example.com:/tmp", ["/tmp/C__foo.txt", "/tmp/C__foo.txt/index.html"]), + ("https://example.com/C:\\foo.txt", ":example.com:/tmp", ["/tmp/C:/foo.txt", "/tmp/C:/foo.txt/index.html", "/tmp/C_/foo.txt", "/tmp/C_/foo.txt/index.html"]), ("https://example.com//etc/passwd", ":example.com:/tmp", ["/tmp/etc/passwd", "/tmp/etc/passwd/index.html"]), ] ) From ebf857a88e7c720d28d12fd1d645c2e0efb000e7 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 17 Jul 2020 17:41:41 +0200 Subject: [PATCH 19/22] map local: add warning if no candidate exists --- mitmproxy/addons/maplocal.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py index 85858d634..fc4353b30 100644 --- a/mitmproxy/addons/maplocal.py +++ b/mitmproxy/addons/maplocal.py @@ -116,11 +116,14 @@ class MapLocal: any_spec_matches = True local_file: typing.Optional[Path] = None + tested_paths = [] if spec.local_path.is_file(): local_file = spec.local_path elif spec.local_path.is_dir(): + tested_paths.append(spec.local_path) for candidate in file_candidates(url, spec): + tested_paths.append(candidate) if candidate.is_file(): local_file = candidate break @@ -139,3 +142,4 @@ class MapLocal: return if any_spec_matches: flow.response = http.HTTPResponse.make(404) + ctx.log.warn(f"None of the local file candidates exist: {*tested_paths,}") From 0c8a46253b8959e9e47b53b9d07dd84cce790940 Mon Sep 17 00:00:00 2001 From: Martin Plattner Date: Fri, 17 Jul 2020 17:54:46 +0200 Subject: [PATCH 20/22] map local: align ascii figures in docs --- docs/src/content/overview-features.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/src/content/overview-features.md b/docs/src/content/overview-features.md index 336e1aa27..36fa9fe84 100644 --- a/docs/src/content/overview-features.md +++ b/docs/src/content/overview-features.md @@ -87,12 +87,12 @@ To illustrate this, consider the following example which maps all requests for `
                   ┌── url regex ──┬─ local path ─┐
 map_local option: |example.com/css|~/static-css
-                            │
-                            │    URL is split here
-                            ▼            ▼
+                            │
+                            │    URL is split here
+                            ▼            ▼
 HTTP Request URL: https://example.com/css/print/main.css?timestamp=123
-                                               │                ▼
-                                               ▼              query string is ignored
+                                                 ▼
+                                               query string is ignored
 Served File:      Preferred: ~/static-css/print/main.css
                    Fallback: ~/static-css/print/main.css/index.html
                   Otherwise: 404 response without content
@@ -105,12 +105,12 @@ If the file depends on the query string, we can use regex capturing groups. In t
                     flow
                   ┌filter┬─────────── url regex ───────────┬─ local path ─┐
 map_local option: |~m GET|example.com/index.php\\?page=(.+)|~/static-dir
-                            │                           │
-                            │                           │ regex group = suffix
-                            ▼                           ▼
-HTTP Request URL: https://example.com/index.php?page=aboutus
-                                                        │
-                                                        ▼
+                           │
+                           │ regex group = suffix
+                           ▼
+HTTP Request URL: https://example.com/index.php?page=aboutus
+                                                        │
+                                                        ▼
 Served File:                 Preferred: ~/static-dir/aboutus
                               Fallback: ~/static-dir/aboutus/index.html
                              Otherwise: 404 response without content

From c98f12c4f3f48e239037a52f3c5dfef110dd797e Mon Sep 17 00:00:00 2001
From: Martin Plattner 
Date: Fri, 17 Jul 2020 18:32:27 +0200
Subject: [PATCH 21/22] map addons: fix tests

---
 mitmproxy/addons/maplocal.py                | 14 +--
 test/mitmproxy/addons/test_maplocal.py      | 95 +++++++++++++++++++--
 test/mitmproxy/addons/test_mapremote.py     |  7 +-
 test/mitmproxy/addons/test_modifybody.py    |  1 -
 test/mitmproxy/addons/test_modifyheaders.py |  9 --
 test/mitmproxy/utils/test_spec.py           | 20 +++++
 6 files changed, 117 insertions(+), 29 deletions(-)
 create mode 100644 test/mitmproxy/utils/test_spec.py

diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py
index fc4353b30..c0881c8cd 100644
--- a/mitmproxy/addons/maplocal.py
+++ b/mitmproxy/addons/maplocal.py
@@ -133,11 +133,15 @@ class MapLocal:
                 if mimetype:
                     headers = {"Content-Type": mimetype}
                 if local_file:
-                    flow.response = http.HTTPResponse.make(
-                        200,
-                        local_file.read_bytes(),
-                        headers
-                    )
+                    try:
+                        flow.response = http.HTTPResponse.make(
+                            200,
+                            local_file.read_bytes(),
+                            headers
+                        )
+                    except IOError as e:
+                        ctx.log.warn(f"Could not read file: {e}")
+                        continue
                     # only set flow.response once, for the first matching rule
                     return
         if any_spec_matches:
diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py
index 5bfd95a44..439724584 100644
--- a/test/mitmproxy/addons/test_maplocal.py
+++ b/test/mitmproxy/addons/test_maplocal.py
@@ -24,8 +24,18 @@ from mitmproxy.test import tflow
         ("https://example.com/foo/bar.jpg", ":/foo/bar.jpg:/tmp", ["/tmp/index.html"]),
     ] + [
         # URL decode and special characters
-        ("http://example.com/foo%20bar.jpg", ":example.com:/tmp", ["/tmp/foo bar.jpg", "/tmp/foo bar.jpg/index.html", "/tmp/foo_bar.jpg", "/tmp/foo_bar.jpg/index.html"]),
-        ("http://example.com/fóobår.jpg", ":example.com:/tmp", ["/tmp/fóobår.jpg", "/tmp/fóobår.jpg/index.html", "/tmp/f_ob_r.jpg", "/tmp/f_ob_r.jpg/index.html"]),
+        ("http://example.com/foo%20bar.jpg", ":example.com:/tmp", [
+            "/tmp/foo bar.jpg",
+            "/tmp/foo bar.jpg/index.html",
+            "/tmp/foo_bar.jpg",
+            "/tmp/foo_bar.jpg/index.html"
+        ]),
+        ("http://example.com/fóobår.jpg", ":example.com:/tmp", [
+            "/tmp/fóobår.jpg",
+            "/tmp/fóobår.jpg/index.html",
+            "/tmp/f_ob_r.jpg",
+            "/tmp/f_ob_r.jpg/index.html"
+        ]),
     ] + [
         # index.html
         ("https://example.com/foo", ":example.com/foo:/tmp", ["/tmp/index.html"]),
@@ -41,13 +51,23 @@ from mitmproxy.test import tflow
         ), (
                 "https://example/results?id=1&foo=2",
                 ":example/(results\\?id=.+):/tmp",
-                ["/tmp/results?id=1&foo=2", "/tmp/results?id=1&foo=2/index.html", "/tmp/results_id=1_foo=2", "/tmp/results_id=1_foo=2/index.html"]
+                [
+                    "/tmp/results?id=1&foo=2",
+                    "/tmp/results?id=1&foo=2/index.html",
+                    "/tmp/results_id=1_foo=2",
+                    "/tmp/results_id=1_foo=2/index.html"
+                ]
         ),
     ] + [
         # test directory traversal detection
         ("https://example.com/../../../../../../etc/passwd", ":example.com:/tmp", []),
         # those get already sanitized to benign versions before they reach our detection:
-        ("https://example.com/C:\\foo.txt", ":example.com:/tmp", ["/tmp/C:/foo.txt", "/tmp/C:/foo.txt/index.html", "/tmp/C_/foo.txt", "/tmp/C_/foo.txt/index.html"]),
+        ("https://example.com/C:\\foo.txt", ":example.com:/tmp", [
+            "/tmp/C:/foo.txt",
+            "/tmp/C:/foo.txt/index.html",
+            "/tmp/C_/foo.txt",
+            "/tmp/C_/foo.txt/index.html"
+        ]),
         ("https://example.com//etc/passwd", ":example.com:/tmp", ["/tmp/etc/passwd", "/tmp/etc/passwd/index.html"]),
     ]
 )
@@ -61,7 +81,17 @@ def test_file_candidates(url, spec, expected_candidates):
 
 
 class TestMapLocal:
-    def test_map_local(self, tmpdir):
+
+    def test_configure(self, tmpdir):
+        ml = MapLocal()
+        with taddons.context(ml) as tctx:
+            tctx.configure(ml, map_local=["/foo/bar/" + str(tmpdir)])
+            with pytest.raises(Exception, match="Invalid regular expression"):
+                tctx.configure(ml, map_local=["/foo/+/" + str(tmpdir)])
+            with pytest.raises(Exception, match="Invalid file path"):
+                tctx.configure(ml, map_local=["/foo/.+/three"])
+
+    def test_simple(self, tmpdir):
         ml = MapLocal()
 
         with taddons.context(ml) as tctx:
@@ -70,7 +100,7 @@ class TestMapLocal:
             tctx.configure(
                 ml,
                 map_local=[
-                    "://example.org/images:" + str(tmpdir)
+                    "|//example.org/images|" + str(tmpdir)
                 ]
             )
             f = tflow.tflow()
@@ -83,7 +113,7 @@ class TestMapLocal:
             tctx.configure(
                 ml,
                 map_local=[
-                    "://example.org:" + str(tmpdir)
+                    "|//example.org|" + str(tmpdir)
                 ]
             )
             f = tflow.tflow()
@@ -96,10 +126,59 @@ class TestMapLocal:
             tctx.configure(
                 ml,
                 map_local=[
-                    ":example.org/foo/foo/bar.jpg:" + str(tmpfile)
+                    "|example.org/foo/foo/bar.jpg|" + str(tmpfile)
                 ]
             )
             f = tflow.tflow()
             f.request.url = b"https://example.org/foo/foo/bar.jpg"
             ml.request(f)
             assert f.response.content == b"foofoobar"
+
+    @pytest.mark.asyncio
+    async def test_nonexistent_files(self, tmpdir, monkeypatch):
+        ml = MapLocal()
+
+        with taddons.context(ml) as tctx:
+            tctx.configure(
+                ml,
+                map_local=[
+                    "|example.org/css|" + str(tmpdir)
+                ]
+            )
+            f = tflow.tflow()
+            f.request.url = b"https://example.org/css/nonexistent"
+            ml.request(f)
+            assert f.response.status_code == 404
+            assert await tctx.master.await_log("None of the local file candidates exist")
+
+            tmpfile = tmpdir.join("foo.jpg")
+            tmpfile.write("foo")
+            tctx.configure(
+                ml,
+                map_local=[
+                    "|//example.org/images|" + str(tmpfile)
+                ]
+            )
+            tmpfile.remove()
+            monkeypatch.setattr(Path, "is_file", lambda x: True)
+            f = tflow.tflow()
+            f.request.url = b"https://example.org/images/foo.jpg"
+            ml.request(f)
+            assert await tctx.master.await_log("could not read file")
+
+    def test_has_reply(self, tmpdir):
+        ml = MapLocal()
+        with taddons.context(ml) as tctx:
+            tmpfile = tmpdir.join("foo.jpg")
+            tmpfile.write("foo")
+            tctx.configure(
+                ml,
+                map_local=[
+                    "|//example.org/images|" + str(tmpfile)
+                ]
+            )
+            f = tflow.tflow()
+            f.request.url = b"https://example.org/images/foo.jpg"
+            f.kill()
+            ml.request(f)
+            assert not f.response
diff --git a/test/mitmproxy/addons/test_mapremote.py b/test/mitmproxy/addons/test_mapremote.py
index 3e06457cc..06879ffa0 100644
--- a/test/mitmproxy/addons/test_mapremote.py
+++ b/test/mitmproxy/addons/test_mapremote.py
@@ -11,13 +11,8 @@ class TestMapRemote:
         mr = mapremote.MapRemote()
         with taddons.context(mr) as tctx:
             tctx.configure(mr, map_remote=["one/two/three"])
-            with pytest.raises(Exception, match="Cannot parse map_remote .* Invalid number"):
-                tctx.configure(mr, map_remote=["/"])
-            with pytest.raises(Exception, match="Cannot parse map_remote .* Invalid filter"):
-                tctx.configure(mr, map_remote=["/~b/two/three"])
-            with pytest.raises(Exception, match="Cannot parse map_remote .* Invalid regular expression"):
+            with pytest.raises(Exception, match="Invalid regular expression"):
                 tctx.configure(mr, map_remote=["/foo/+/three"])
-            tctx.configure(mr, map_remote=["/a/b/c/"])
 
     def test_simple(self):
         mr = mapremote.MapRemote()
diff --git a/test/mitmproxy/addons/test_modifybody.py b/test/mitmproxy/addons/test_modifybody.py
index f78932e52..1b25361d2 100644
--- a/test/mitmproxy/addons/test_modifybody.py
+++ b/test/mitmproxy/addons/test_modifybody.py
@@ -12,7 +12,6 @@ class TestModifyBody:
             tctx.configure(mb, modify_body=["one/two/three"])
             with pytest.raises(Exception, match="Cannot parse modify_body"):
                 tctx.configure(mb, modify_body=["/"])
-            tctx.configure(mb, modify_body=["/a/b/c/"])
 
     def test_simple(self):
         mb = modifybody.ModifyBody()
diff --git a/test/mitmproxy/addons/test_modifyheaders.py b/test/mitmproxy/addons/test_modifyheaders.py
index f3ebd279f..c358c0d2a 100644
--- a/test/mitmproxy/addons/test_modifyheaders.py
+++ b/test/mitmproxy/addons/test_modifyheaders.py
@@ -22,15 +22,6 @@ def test_parse_modify_spec():
     assert spec.subject == b"bar"
     assert spec.read_replacement() == b"voing"
 
-    with pytest.raises(ValueError, match="Invalid number of parameters"):
-        parse_modify_spec("/", False)
-
-    with pytest.raises(ValueError, match="Invalid filter pattern"):
-        parse_modify_spec("/~b/one/two", False)
-
-    with pytest.raises(ValueError, match="Invalid filter pattern"):
-        parse_modify_spec("/~b/one/two", False)
-
     with pytest.raises(ValueError, match="Invalid regular expression"):
         parse_modify_spec("/[/two", True)
 
diff --git a/test/mitmproxy/utils/test_spec.py b/test/mitmproxy/utils/test_spec.py
new file mode 100644
index 000000000..63a063563
--- /dev/null
+++ b/test/mitmproxy/utils/test_spec.py
@@ -0,0 +1,20 @@
+import pytest
+from mitmproxy.utils.spec import parse_spec
+
+
+def test_parse_spec():
+    flow_filter, subject, replacement = parse_spec("/foo/bar/voing")
+    assert flow_filter.pattern == "foo"
+    assert subject == "bar"
+    assert replacement == "voing"
+
+    flow_filter, subject, replacement = parse_spec("/bar/voing")
+    assert flow_filter(1) is True
+    assert subject == "bar"
+    assert replacement == "voing"
+
+    with pytest.raises(ValueError, match="Invalid number of parameters"):
+        parse_spec("/")
+
+    with pytest.raises(ValueError, match="Invalid filter pattern"):
+        parse_spec("/~b/one/two")

From fb743c7da7dc33441d0c67f91724d15374dc5fed Mon Sep 17 00:00:00 2001
From: Maximilian Hils 
Date: Fri, 17 Jul 2020 20:25:48 +0200
Subject: [PATCH 22/22] map local: minor fixes

---
 mitmproxy/addons/maplocal.py           | 68 +++++++++++++-------------
 test/mitmproxy/addons/test_maplocal.py | 16 +++---
 2 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/mitmproxy/addons/maplocal.py b/mitmproxy/addons/maplocal.py
index c0881c8cd..e353e82ce 100644
--- a/mitmproxy/addons/maplocal.py
+++ b/mitmproxy/addons/maplocal.py
@@ -1,7 +1,7 @@
 import mimetypes
 import re
 import typing
-import urllib
+import urllib.parse
 from pathlib import Path
 
 from werkzeug.security import safe_join
@@ -59,19 +59,19 @@ def file_candidates(url: str, spec: MapLocalSpec) -> typing.List[Path]:
     else:
         suffix = re.split(spec.regex, url, maxsplit=1)[1]
         suffix = suffix.split("?")[0]  # remove query string
-        suffix = suffix.strip("/").replace("\\", "/")
+        suffix = suffix.strip("/")
 
     if suffix:
         decoded_suffix = urllib.parse.unquote(suffix)
-        simplified_suffix = re.sub(r"[^0-9a-zA-Z\-_.=(),/]", "_", decoded_suffix)
-
         suffix_candidates = [decoded_suffix, f"{decoded_suffix}/index.html"]
-        if decoded_suffix != simplified_suffix:
-            suffix_candidates.append(simplified_suffix)
-            suffix_candidates.append(f"{simplified_suffix}/index.html")
+
+        escaped_suffix = re.sub(r"[^0-9a-zA-Z\-_.=(),/]", "_", decoded_suffix)
+        if decoded_suffix != escaped_suffix:
+            suffix_candidates.extend([escaped_suffix, f"{escaped_suffix}/index.html"])
         try:
             return [
-                _safe_path_join(spec.local_path, suff) for suff in suffix_candidates
+                _safe_path_join(spec.local_path, x)
+                for x in suffix_candidates
             ]
         except ValueError:
             return []
@@ -110,40 +110,42 @@ class MapLocal:
 
         url = flow.request.pretty_url
 
-        any_spec_matches = False
+        all_candidates = []
         for spec in self.replacements:
             if spec.matches(flow) and re.search(spec.regex, url):
-                any_spec_matches = True
-
-                local_file: typing.Optional[Path] = None
-                tested_paths = []
-
                 if spec.local_path.is_file():
-                    local_file = spec.local_path
-                elif spec.local_path.is_dir():
-                    tested_paths.append(spec.local_path)
-                    for candidate in file_candidates(url, spec):
-                        tested_paths.append(candidate)
-                        if candidate.is_file():
-                            local_file = candidate
-                            break
+                    candidates = [spec.local_path]
+                else:
+                    candidates = file_candidates(url, spec)
+                all_candidates.extend(candidates)
+
+                local_file = None
+                for candidate in candidates:
+                    if candidate.is_file():
+                        local_file = candidate
+                        break
 
-                headers = {"Server": version.MITMPROXY}
-                mimetype = mimetypes.guess_type(str(local_file))[0]
-                if mimetype:
-                    headers = {"Content-Type": mimetype}
                 if local_file:
+                    headers = {
+                        "Server": version.MITMPROXY
+                    }
+                    mimetype = mimetypes.guess_type(str(local_file))[0]
+                    if mimetype:
+                        headers["Content-Type"] = mimetype
+
                     try:
-                        flow.response = http.HTTPResponse.make(
-                            200,
-                            local_file.read_bytes(),
-                            headers
-                        )
+                        contents = local_file.read_bytes()
                     except IOError as e:
                         ctx.log.warn(f"Could not read file: {e}")
                         continue
+
+                    flow.response = http.HTTPResponse.make(
+                        200,
+                        contents,
+                        headers
+                    )
                     # only set flow.response once, for the first matching rule
                     return
-        if any_spec_matches:
+        if all_candidates:
             flow.response = http.HTTPResponse.make(404)
-            ctx.log.warn(f"None of the local file candidates exist: {*tested_paths,}")
+            ctx.log.info(f"None of the local file candidates exist: {', '.join(str(x) for x in all_candidates)}")
diff --git a/test/mitmproxy/addons/test_maplocal.py b/test/mitmproxy/addons/test_maplocal.py
index 439724584..c1fe95e56 100644
--- a/test/mitmproxy/addons/test_maplocal.py
+++ b/test/mitmproxy/addons/test_maplocal.py
@@ -1,3 +1,4 @@
+import sys
 from pathlib import Path
 
 import pytest
@@ -20,7 +21,8 @@ from mitmproxy.test import tflow
         ("http://example.com/foo/bar.jpg", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]),
         ("https://example.com/foo/bar.jpg", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]),
         ("https://example.com/foo/bar.jpg?query", ":example.com/foo:/tmp", ["/tmp/bar.jpg", "/tmp/bar.jpg/index.html"]),
-        ("https://example.com/foo/bar/baz.jpg", ":example.com/foo:/tmp", ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]),
+        ("https://example.com/foo/bar/baz.jpg", ":example.com/foo:/tmp",
+         ["/tmp/bar/baz.jpg", "/tmp/bar/baz.jpg/index.html"]),
         ("https://example.com/foo/bar.jpg", ":/foo/bar.jpg:/tmp", ["/tmp/index.html"]),
     ] + [
         # URL decode and special characters
@@ -61,12 +63,12 @@ from mitmproxy.test import tflow
     ] + [
         # test directory traversal detection
         ("https://example.com/../../../../../../etc/passwd", ":example.com:/tmp", []),
-        # those get already sanitized to benign versions before they reach our detection:
-        ("https://example.com/C:\\foo.txt", ":example.com:/tmp", [
-            "/tmp/C:/foo.txt",
-            "/tmp/C:/foo.txt/index.html",
-            "/tmp/C_/foo.txt",
-            "/tmp/C_/foo.txt/index.html"
+        # this is slightly hacky, but werkzeug's behavior differs per system.
+        ("https://example.com/C:\\foo.txt", ":example.com:/tmp", [] if sys.platform == "win32" else [
+            "/tmp/C:\\foo.txt",
+            "/tmp/C:\\foo.txt/index.html",
+            "/tmp/C__foo.txt",
+            "/tmp/C__foo.txt/index.html"
         ]),
         ("https://example.com//etc/passwd", ":example.com:/tmp", ["/tmp/etc/passwd", "/tmp/etc/passwd/index.html"]),
     ]