diff --git a/docs/build.py b/docs/build.py new file mode 100755 index 000000000..67ce6c3fe --- /dev/null +++ b/docs/build.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +import shutil +import subprocess +from pathlib import Path + + +here = Path(__file__).parent + +for script in (here / "scripts").glob("*.py"): + print(f"Generating output for {script.name}...") + out = subprocess.check_output(["python3", script.absolute()], text=True) + if out: + (here / "src" / "generated" / f"{script.stem}.html").write_text(out, encoding="utf8") + +if (here / "public").exists(): + shutil.rmtree(here / "public") +subprocess.run(["hugo"], cwd=here / "src") diff --git a/docs/build.sh b/docs/build.sh deleted file mode 100755 index eaeb87907..000000000 --- a/docs/build.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit -set -o pipefail -set -o nounset -# set -o xtrace - -SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -pushd ${SCRIPTPATH} - -for script in scripts/*.py ; do - output="${script##*/}" - output="src/generated/${output%.*}.html" - echo "Generating output for ${script} into ${output} ..." - "${script}" > "${output}" -done - -rm -rf ./public -cd src -hugo diff --git a/docs/ci.sh b/docs/ci.sh index 0d920e090..92597223b 100755 --- a/docs/ci.sh +++ b/docs/ci.sh @@ -6,7 +6,7 @@ set -o pipefail # This script gets run from CI to render and upload docs for the master branch. -./build.sh +./build.py # Only upload if we have defined credentials - we only have these defined for # trusted commits (i.e. not PRs). diff --git a/docs/modd.conf b/docs/modd.conf index 3f16cdb59..f3fd350bc 100644 --- a/docs/modd.conf +++ b/docs/modd.conf @@ -1,5 +1,5 @@ -scripts/*.py { - prep: ./build.sh +scripts/** { + prep: python3 build.py } { diff --git a/docs/scripts/api-events.py b/docs/scripts/api-events.py new file mode 100644 index 000000000..6a8b331f0 --- /dev/null +++ b/docs/scripts/api-events.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +import contextlib +import inspect +import textwrap +from pathlib import Path +from typing import List, Type + +import mitmproxy.addons.next_layer # noqa +from mitmproxy import hooks, log, addonmanager +from mitmproxy.proxy import server_hooks, layer +from mitmproxy.proxy.layers import http, tcp, tls, websocket + +known = set() + + +def category(name: str, desc: str, hooks: List[Type[hooks.Hook]]) -> None: + all_params = [ + list(inspect.signature(hook.__init__).parameters.values())[1:] + for hook in hooks + ] + + # slightly overengineered, but this was fun to write. ¯\_(ツ)_/¯ + imports = set() + types = set() + for params in all_params: + for param in params: + try: + mod = inspect.getmodule(param.annotation).__name__ + if mod == "typing": + # this is ugly, but can be removed once we are on Python 3.9+ only + imports.add(inspect.getmodule(param.annotation.__args__[0]).__name__) + types.add(param.annotation._name) + else: + imports.add(mod) + except AttributeError: + raise ValueError(f"Missing type annotation: {params}") + imports.discard("builtins") + if types: + print(f"from typing import {', '.join(sorted(types))}") + print("from mitmproxy import ctx") + for imp in sorted(imports): + print(f"import {imp}") + print() + + print(f"class {name}Events:") + print(f' """{desc}"""') + + first = True + for hook, params in zip(hooks, all_params): + if first: + first = False + else: + print() + if hook.name in known: + raise RuntimeError(f"Already documented: {hook}") + known.add(hook.name) + doc = inspect.getdoc(hook) + print(f" def {hook.name}({', '.join(str(p) for p in ['self'] + params)}):") + print(textwrap.indent(f'"""\n{doc}\n"""', " ")) + if params: + print(f' ctx.log(f"{hook.name}: {" ".join("{" + p.name + "=}" for p in params)}")') + else: + print(f' ctx.log("{hook.name}")') + print("") + + +outfile = Path(__file__).parent.parent / "src" / "generated" / "events.py" + +with outfile.open("w") as f, contextlib.redirect_stdout(f): + print("# This file is autogenerated, do not edit manually.") + + category( + "Lifecycle", + "", + [ + addonmanager.LoadHook, + hooks.RunningHook, + hooks.ConfigureHook, + hooks.DoneHook, + ] + ) + + category( + "Connection", + "", + [ + server_hooks.ClientConnectedHook, + server_hooks.ClientDisconnectedHook, + server_hooks.ServerConnectHook, + server_hooks.ServerConnectedHook, + server_hooks.ServerDisconnectedHook, + ] + ) + + category( + "HTTP", + "", + [ + http.HttpRequestHeadersHook, + http.HttpRequestHook, + http.HttpResponseHeadersHook, + http.HttpResponseHook, + http.HttpErrorHook, + http.HttpConnectHook, + ] + ) + + category( + "TCP", + "", + [ + tcp.TcpStartHook, + tcp.TcpMessageHook, + tcp.TcpEndHook, + tcp.TcpErrorHook, + ] + ) + + category( + "TLS", + "", + [ + tls.TlsClienthelloHook, + tls.TlsStartHook, + ] + ) + + category( + "WebSocket", + "", + [ + websocket.WebsocketStartHook, + websocket.WebsocketMessageHook, + websocket.WebsocketEndHook, + websocket.WebsocketErrorHook, + ] + ) + + category( + "AdvancedLifecycle", + "", + [ + layer.NextLayerHook, + hooks.UpdateHook, + log.AddLogHook, + ] + ) + +not_documented = set(hooks.all_hooks.keys()) - known +if not_documented: + raise RuntimeError(f"Not documented: {not_documented}") diff --git a/docs/scripts/api-render.py b/docs/scripts/api-render.py new file mode 100644 index 000000000..1991b22dc --- /dev/null +++ b/docs/scripts/api-render.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +import os +import shutil +import textwrap +from pathlib import Path + +import pdoc.render_helpers + +here = Path(__file__).parent + +if os.environ.get("DOCS_ARCHIVE", False): + edit_url_map = {} +else: + edit_url_map = { + "mitmproxy": "https://github.com/mitmproxy/mitmproxy/blob/master/mitmproxy/", + } + +pdoc.render.configure( + template_directory=here / "pdoc-template", + edit_url_map=edit_url_map, +) +# We can't configure Hugo, but we can configure pdoc. +pdoc.render_helpers.formatter.cssclass = "chroma" + +modules = [ + "mitmproxy.addonmanager", + "mitmproxy.certs", + "mitmproxy.connection", + "mitmproxy.coretypes.multidict", + "mitmproxy.flow", + "mitmproxy.http", + "mitmproxy.net.server_spec", + "mitmproxy.proxy.server_hooks", + "mitmproxy.tcp", + "mitmproxy.websocket", + here / ".." / "src" / "generated" / "events.py", +] + +pdoc.pdoc( + *modules, + output_directory=here / ".." / "src" / "generated" / "api" +) + +api_content = here / ".." / "src" / "content" / "api" +if api_content.exists(): + shutil.rmtree(api_content) + +api_content.mkdir() + +for module in modules: + if isinstance(module, Path): + continue + filename = f"api/{module.replace('.', '/')}.html" + (api_content / f"{module}.md").write_text(textwrap.dedent(f""" + --- + title: "{module}" + url: "{filename}" + + menu: + addons: + parent: 'Event Hooks & API' + --- + + {{{{< readfile file="/generated/{filename}" >}}}} + """)) + +(here / ".." / "src" / "content" / "addons-api.md").touch() diff --git a/docs/scripts/events.py b/docs/scripts/events.py deleted file mode 100755 index 6462c6341..000000000 --- a/docs/scripts/events.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -import inspect -import textwrap -from typing import List, Type - -import mitmproxy.addons.next_layer # noqa -from mitmproxy import hooks, log, addonmanager -from mitmproxy.proxy import server_hooks, layer -from mitmproxy.proxy.layers import http, tcp, tls, websocket - -known = set() - - -def category(name: str, hooks: List[Type[hooks.Hook]]) -> None: - print(f"### {name} Events") - print("```python") - - all_params = [ - list(inspect.signature(hook.__init__).parameters.values())[1:] - for hook in hooks - ] - - # slightly overengineered, but this was fun to write. ¯\_(ツ)_/¯ - imports = set() - types = set() - for params in all_params: - for param in params: - try: - mod = inspect.getmodule(param.annotation).__name__ - if mod == "typing": - # this is ugly, but can be removed once we are on Python 3.9+ only - imports.add(inspect.getmodule(param.annotation.__args__[0]).__name__) - types.add(param.annotation._name) - else: - imports.add(mod) - except AttributeError: - raise ValueError(f"Missing type annotation: {params}") - imports.discard("builtins") - if types: - print(f"from typing import {', '.join(sorted(types))}") - print("from mitmproxy import ctx") - for imp in sorted(imports): - print(f"import {imp}") - print() - - first = True - for hook, params in zip(hooks, all_params): - if first: - first = False - else: - print() - if hook.name in known: - raise RuntimeError(f"Already documented: {hook}") - known.add(hook.name) - doc = inspect.getdoc(hook) - print(f"def {hook.name}({', '.join(str(p) for p in params)}):") - print(textwrap.indent(f'"""\n{doc}\n"""', " ")) - if params: - print(f' ctx.log(f"{hook.name}: {" ".join("{" + p.name + "=}" for p in params)}")') - else: - print(f' ctx.log("{hook.name}")') - print("```") - - -category( - "Lifecycle", - [ - addonmanager.LoadHook, - hooks.RunningHook, - hooks.ConfigureHook, - hooks.DoneHook, - ] -) - -category( - "Connection", - [ - server_hooks.ClientConnectedHook, - server_hooks.ClientDisconnectedHook, - server_hooks.ServerConnectHook, - server_hooks.ServerConnectedHook, - server_hooks.ServerDisconnectedHook, - ] -) - -category( - "HTTP", - [ - http.HttpRequestHeadersHook, - http.HttpRequestHook, - http.HttpResponseHeadersHook, - http.HttpResponseHook, - http.HttpErrorHook, - http.HttpConnectHook, - ] -) - -category( - "TCP", - [ - tcp.TcpStartHook, - tcp.TcpMessageHook, - tcp.TcpEndHook, - tcp.TcpErrorHook, - ] -) - -category( - "TLS", - [ - tls.TlsClienthelloHook, - tls.TlsStartHook, - ] -) - -category( - "WebSocket", - [ - websocket.WebsocketStartHook, - websocket.WebsocketMessageHook, - websocket.WebsocketEndHook, - websocket.WebsocketErrorHook, - ] -) - -category( - "Advanced Lifecycle", - [ - layer.NextLayerHook, - hooks.UpdateHook, - log.AddLogHook, - ] -) - -not_documented = set(hooks.all_hooks.keys()) - known -if not_documented: - raise RuntimeError(f"Not documented: {not_documented}") - -# print("") -# for i in flowfilter.help: -# print("" % i) -# print("
%s%s
") diff --git a/docs/scripts/examples.py b/docs/scripts/examples.py index c0209274e..566df2511 100755 --- a/docs/scripts/examples.py +++ b/docs/scripts/examples.py @@ -28,21 +28,38 @@ for example in examples: else: comment = "" overview.append( - f" * [{example.name}](#{slug}){comment}" + f" * [{example.name}](#{slug}){comment}\n" ) listings.append(f""" -

Example: {example.name}

+

Example: {example.name}

```python -{code} +{code.strip()} ``` """) -print("\n".join(overview)) -print(""" -### Community Examples + +print(f""" +# Addon Examples + +### Dedicated Example Addons + +{"".join(overview)} + +### Built-In Addons + +Much of mitmproxy’s own functionality is defined in +[a suite of built-in addons](https://github.com/mitmproxy/mitmproxy/tree/master/mitmproxy/addons), +implementing everything from functionality like anticaching and sticky cookies to our onboarding webapp. +The built-in addons make for instructive reading, and you will quickly see that quite complex functionality +can often boil down to a very small, completely self-contained modules. + + +### Additional Community Examples Additional examples contributed by the mitmproxy community can be found [on GitHub](https://github.com/mitmproxy/mitmproxy/tree/master/examples/contrib). +------------------------- + +{"".join(listings)} """) -print("\n".join(listings)) diff --git a/docs/scripts/pdoc-template/frame.html.jinja2 b/docs/scripts/pdoc-template/frame.html.jinja2 new file mode 100644 index 000000000..ce2bf96d5 --- /dev/null +++ b/docs/scripts/pdoc-template/frame.html.jinja2 @@ -0,0 +1,3 @@ +{% block style %}{% endblock %} +{% block body %}{% endblock %} +
{% block attribution %}{% endblock %}
diff --git a/docs/scripts/pdoc-template/module.html.jinja2 b/docs/scripts/pdoc-template/module.html.jinja2 new file mode 100644 index 000000000..dbe3baf15 --- /dev/null +++ b/docs/scripts/pdoc-template/module.html.jinja2 @@ -0,0 +1,62 @@ +{% extends "default/module.html.jinja2" %} +{% block nav %}{% endblock %} +{% block style_layout %}{% endblock %} +{% block style_pygments %}{% endblock %} +{# +To document all event hooks, we do a bit of hackery: + 1. scripts/api-events.py auto-generates generated/events.py. + 2. scripts/api-render.py renders generated/events.py together with the remaining API docs. + 3. This templates hides some elements of the default pdoc template. + +#} +{% if module.name == "events" %} + {% macro module_name() %} + {% endmacro %} + {% macro view_source(doc) %} + {% if doc.type != "module" %} + {{ default_view_source(doc) }} + {% endif %} + {% endmacro %} + {% macro is_public(doc) %} + {% if doc.name != "__init__" %} + {{ default_is_public(doc) }} + {% endif %} + {% endmacro %} +{% else %} + {% macro is_public(doc) %} + {% if doc.name is in(["from_state", "get_state", "set_state"]) %} + {% elif doc.modulename == "mitmproxy.addonmanager" %} + {% if doc.qualname.startswith("Loader") and not doc.name.startswith("_") %} + true + {% endif %} + {% elif doc.modulename == "mitmproxy.certs" %} + {% if doc.qualname == "Cert" or doc.qualname.startswith("Cert.") %} + {{ default_is_public(doc) }} + {% endif %} + {% elif doc.modulename == "mitmproxy.coretypes.multidict" %} + {% if doc.name == "_MultiDict" %} + true + {% else %} + {{ default_is_public(doc) }} + {% endif %} + {% elif doc.modulename == "mitmproxy.flow" %} + {% if doc.name is not in(["__init__", "reply", "metadata"]) %} + {{ default_is_public(doc) }} + {% endif %} + {% elif doc.modulename == "mitmproxy.http" %} + {% if doc.qualname is not in([ + "Message.__init__", "Message.data", + "Request.data", + "Response.data", + ]) %} + {{ default_is_public(doc) }} + {% endif %} + {% elif doc.modulename == "mitmproxy.proxy.server_hooks" %} + {% if doc.qualname.startswith("ServerConnectionHookData") and doc.name != "__init__" %} + {{ default_is_public(doc) }} + {% endif %} + {% else %} + {{ default_is_public(doc) }} + {% endif %} + {% endmacro %} +{% endif %} diff --git a/docs/src/assets/style.scss b/docs/src/assets/style.scss index e5254b541..3f39cea92 100644 --- a/docs/src/assets/style.scss +++ b/docs/src/assets/style.scss @@ -1,4 +1,12 @@ @import "./syntax"; +/* background for both hugo *and* pdoc. */ +.chroma pre, pre.chroma { + background-color: #f7f7f7; + border-top: 1px solid #ccc; + border-bottom: 1px solid #ccc; + padding: .5rem 0 .5rem .5rem; +} + @import "./badge"; $primary: #C93312; @@ -7,6 +15,9 @@ $family-sans-serif: BlinkMacSystemFont, -apple-system, "Segoe UI", "Roboto", "Ox $panel-heading-size: 1em; $panel-heading-weight: 600; +$menu-list-link-padding: .3em .75em; +$menu-label-spacing: .7em; +$menu-nested-list-margin: .3em .75em; /*!* bulma.io v0.8.0 | MIT License | github.com/jgthms/bulma */ @@ -17,6 +28,10 @@ bulma.io v0.8.0 | MIT License | github.com/jgthms/bulma */ @import "./bulma/components/_all"; @import "./bulma/layout/_all"; +html { + scroll-behavior: smooth; +} + html, body { height: 100%; } diff --git a/docs/src/assets/syntax.css b/docs/src/assets/syntax.css index 33ce367c9..23be04464 100644 --- a/docs/src/assets/syntax.css +++ b/docs/src/assets/syntax.css @@ -1,59 +1,82 @@ -/* Background */ .chroma { color: #f8f8f2; background-color: #272822 } -/* Error */ .chroma .err { color: #960050; background-color: #1e0010 } +/* Background */ .chroma { background-color: #ffffff } +/* Other */ .chroma .x { } +/* Error */ .chroma .err { color: #a61717; background-color: #e3d2d2 } /* LineTableTD */ .chroma .lntd { vertical-align: top; padding: 0; margin: 0; border: 0; } -/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; width: 100%; overflow: auto; display: block; } +/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; width: auto; overflow: auto; display: block; } /* LineHighlight */ .chroma .hl { display: block; width: 100%;background-color: #ffffcc } -/* LineNumbersTable */ .chroma .lnt { margin-right: 0.4em; padding: 0 0.4em 0 0.4em; display: block; } -/* LineNumbers */ .chroma .ln { margin-right: 0.4em; padding: 0 0.4em 0 0.4em; } -/* Keyword */ .chroma .k { color: #66d9ef } -/* KeywordConstant */ .chroma .kc { color: #66d9ef } -/* KeywordDeclaration */ .chroma .kd { color: #66d9ef } -/* KeywordNamespace */ .chroma .kn { color: #f92672 } -/* KeywordPseudo */ .chroma .kp { color: #66d9ef } -/* KeywordReserved */ .chroma .kr { color: #66d9ef } -/* KeywordType */ .chroma .kt { color: #66d9ef } -/* NameAttribute */ .chroma .na { color: #a6e22e } -/* NameClass */ .chroma .nc { color: #a6e22e } -/* NameConstant */ .chroma .no { color: #66d9ef } -/* NameDecorator */ .chroma .nd { color: #a6e22e } -/* NameException */ .chroma .ne { color: #a6e22e } -/* NameFunction */ .chroma .nf { color: #a6e22e } -/* NameOther */ .chroma .nx { color: #a6e22e } -/* NameTag */ .chroma .nt { color: #f92672 } -/* Literal */ .chroma .l { color: #ae81ff } -/* LiteralDate */ .chroma .ld { color: #e6db74 } -/* LiteralString */ .chroma .s { color: #e6db74 } -/* LiteralStringAffix */ .chroma .sa { color: #e6db74 } -/* LiteralStringBacktick */ .chroma .sb { color: #e6db74 } -/* LiteralStringChar */ .chroma .sc { color: #e6db74 } -/* LiteralStringDelimiter */ .chroma .dl { color: #e6db74 } -/* LiteralStringDoc */ .chroma .sd { color: #e6db74 } -/* LiteralStringDouble */ .chroma .s2 { color: #e6db74 } -/* LiteralStringEscape */ .chroma .se { color: #ae81ff } -/* LiteralStringHeredoc */ .chroma .sh { color: #e6db74 } -/* LiteralStringInterpol */ .chroma .si { color: #e6db74 } -/* LiteralStringOther */ .chroma .sx { color: #e6db74 } -/* LiteralStringRegex */ .chroma .sr { color: #e6db74 } -/* LiteralStringSingle */ .chroma .s1 { color: #e6db74 } -/* LiteralStringSymbol */ .chroma .ss { color: #e6db74 } -/* LiteralNumber */ .chroma .m { color: #ae81ff } -/* LiteralNumberBin */ .chroma .mb { color: #ae81ff } -/* LiteralNumberFloat */ .chroma .mf { color: #ae81ff } -/* LiteralNumberHex */ .chroma .mh { color: #ae81ff } -/* LiteralNumberInteger */ .chroma .mi { color: #ae81ff } -/* LiteralNumberIntegerLong */ .chroma .il { color: #ae81ff } -/* LiteralNumberOct */ .chroma .mo { color: #ae81ff } -/* Operator */ .chroma .o { color: #f92672 } -/* OperatorWord */ .chroma .ow { color: #f92672 } -/* Comment */ .chroma .c { color: #75715e } -/* CommentHashbang */ .chroma .ch { color: #75715e } -/* CommentMultiline */ .chroma .cm { color: #75715e } -/* CommentSingle */ .chroma .c1 { color: #75715e } -/* CommentSpecial */ .chroma .cs { color: #75715e } -/* CommentPreproc */ .chroma .cp { color: #75715e } -/* CommentPreprocFile */ .chroma .cpf { color: #75715e } -/* GenericDeleted */ .chroma .gd { color: #f92672 } -/* GenericEmph */ .chroma .ge { font-style: italic } -/* GenericInserted */ .chroma .gi { color: #a6e22e } +/* LineNumbersTable */ .chroma .lnt { margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #7f7f7f } +/* LineNumbers */ .chroma .ln { margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #7f7f7f } +/* Keyword */ .chroma .k { color: #000000; font-weight: bold } +/* KeywordConstant */ .chroma .kc { color: #000000; font-weight: bold } +/* KeywordDeclaration */ .chroma .kd { color: #000000; font-weight: bold } +/* KeywordNamespace */ .chroma .kn { color: #000000; font-weight: bold } +/* KeywordPseudo */ .chroma .kp { color: #000000; font-weight: bold } +/* KeywordReserved */ .chroma .kr { color: #000000; font-weight: bold } +/* KeywordType */ .chroma .kt { color: #445588; font-weight: bold } +/* Name */ .chroma .n { } +/* NameAttribute */ .chroma .na { color: #008080 } +/* NameBuiltin */ .chroma .nb { color: #0086b3 } +/* NameBuiltinPseudo */ .chroma .bp { color: #999999 } +/* NameClass */ .chroma .nc { color: #445588; font-weight: bold } +/* NameConstant */ .chroma .no { color: #008080 } +/* NameDecorator */ .chroma .nd { color: #3c5d5d; font-weight: bold } +/* NameEntity */ .chroma .ni { color: #800080 } +/* NameException */ .chroma .ne { color: #990000; font-weight: bold } +/* NameFunction */ .chroma .nf { color: #990000; font-weight: bold } +/* NameFunctionMagic */ .chroma .fm { } +/* NameLabel */ .chroma .nl { color: #990000; font-weight: bold } +/* NameNamespace */ .chroma .nn { color: #555555 } +/* NameOther */ .chroma .nx { } +/* NameProperty */ .chroma .py { } +/* NameTag */ .chroma .nt { color: #000080 } +/* NameVariable */ .chroma .nv { color: #008080 } +/* NameVariableClass */ .chroma .vc { color: #008080 } +/* NameVariableGlobal */ .chroma .vg { color: #008080 } +/* NameVariableInstance */ .chroma .vi { color: #008080 } +/* NameVariableMagic */ .chroma .vm { } +/* Literal */ .chroma .l { } +/* LiteralDate */ .chroma .ld { } +/* LiteralString */ .chroma .s { color: #dd1144 } +/* LiteralStringAffix */ .chroma .sa { color: #dd1144 } +/* LiteralStringBacktick */ .chroma .sb { color: #dd1144 } +/* LiteralStringChar */ .chroma .sc { color: #dd1144 } +/* LiteralStringDelimiter */ .chroma .dl { color: #dd1144 } +/* LiteralStringDoc */ .chroma .sd { color: #dd1144 } +/* LiteralStringDouble */ .chroma .s2 { color: #dd1144 } +/* LiteralStringEscape */ .chroma .se { color: #dd1144 } +/* LiteralStringHeredoc */ .chroma .sh { color: #dd1144 } +/* LiteralStringInterpol */ .chroma .si { color: #dd1144 } +/* LiteralStringOther */ .chroma .sx { color: #dd1144 } +/* LiteralStringRegex */ .chroma .sr { color: #009926 } +/* LiteralStringSingle */ .chroma .s1 { color: #dd1144 } +/* LiteralStringSymbol */ .chroma .ss { color: #990073 } +/* LiteralNumber */ .chroma .m { color: #009999 } +/* LiteralNumberBin */ .chroma .mb { color: #009999 } +/* LiteralNumberFloat */ .chroma .mf { color: #009999 } +/* LiteralNumberHex */ .chroma .mh { color: #009999 } +/* LiteralNumberInteger */ .chroma .mi { color: #009999 } +/* LiteralNumberIntegerLong */ .chroma .il { color: #009999 } +/* LiteralNumberOct */ .chroma .mo { color: #009999 } +/* Operator */ .chroma .o { color: #000000; font-weight: bold } +/* OperatorWord */ .chroma .ow { color: #000000; font-weight: bold } +/* Punctuation */ .chroma .p { } +/* Comment */ .chroma .c { color: #999988; font-style: italic } +/* CommentHashbang */ .chroma .ch { color: #999988; font-style: italic } +/* CommentMultiline */ .chroma .cm { color: #999988; font-style: italic } +/* CommentSingle */ .chroma .c1 { color: #999988; font-style: italic } +/* CommentSpecial */ .chroma .cs { color: #999999; font-weight: bold; font-style: italic } +/* CommentPreproc */ .chroma .cp { color: #999999; font-weight: bold; font-style: italic } +/* CommentPreprocFile */ .chroma .cpf { color: #999999; font-weight: bold; font-style: italic } +/* Generic */ .chroma .g { } +/* GenericDeleted */ .chroma .gd { color: #000000; background-color: #ffdddd } +/* GenericEmph */ .chroma .ge { color: #000000; font-style: italic } +/* GenericError */ .chroma .gr { color: #aa0000 } +/* GenericHeading */ .chroma .gh { color: #999999 } +/* GenericInserted */ .chroma .gi { color: #000000; background-color: #ddffdd } +/* GenericOutput */ .chroma .go { color: #888888 } +/* GenericPrompt */ .chroma .gp { color: #555555 } /* GenericStrong */ .chroma .gs { font-weight: bold } -/* GenericSubheading */ .chroma .gu { color: #75715e } +/* GenericSubheading */ .chroma .gu { color: #aaaaaa } +/* GenericTraceback */ .chroma .gt { color: #aa0000 } +/* GenericUnderline */ .chroma .gl { text-decoration: underline } +/* TextWhitespace */ .chroma .w { color: #bbbbbb } diff --git a/docs/src/config.toml b/docs/src/config.toml index fa6f3dc6c..0d26241bc 100644 --- a/docs/src/config.toml +++ b/docs/src/config.toml @@ -5,6 +5,7 @@ theme = "mitmproxydocs" publishDir = "../public" RelativeURLs = true pygmentsCodefences = true +pygmentsUseClasses = true [indexes] tag = "tags" diff --git a/docs/src/content/addons-events.md b/docs/src/content/addons-api.md similarity index 66% rename from docs/src/content/addons-events.md rename to docs/src/content/addons-api.md index eb58ddb6c..a0be2b7d2 100644 --- a/docs/src/content/addons-events.md +++ b/docs/src/content/addons-api.md @@ -1,8 +1,12 @@ --- -title: "Event Hooks" +title: "Event Hooks & API" +url: "api/events.html" +aliases: + - /addons-events/ +layout: single menu: addons: - weight: 2 + weight: 3 --- # Event Hooks @@ -16,9 +20,8 @@ header with a count of the number of responses seen: {{< example src="examples/addons/http-add-header.py" lang="py" >}} -## Supported Events +## Available Hooks -Below we list events supported by mitmproxy. We've added -annotations to illustrate the argument types. +The following addons list all available event hooks. -{{< readfile file="/generated/events.html" markdown="true" >}} +{{< readfile file="/generated/api/events.html" >}} diff --git a/docs/src/content/addons-examples.md b/docs/src/content/addons-examples.md index eaa4c7557..9ddf11541 100644 --- a/docs/src/content/addons-examples.md +++ b/docs/src/content/addons-examples.md @@ -1,11 +1,9 @@ --- -title: "Example Addons" +title: "Examples" menu: addons: weight: 6 --- -# Example Addons - {{< readfile file="/generated/examples.html" markdown="true" >}} diff --git a/docs/src/content/addons-overview.md b/docs/src/content/addons-overview.md index cc35eaa28..fe7972fdc 100644 --- a/docs/src/content/addons-overview.md +++ b/docs/src/content/addons-overview.md @@ -7,39 +7,19 @@ menu: # Addons -Mitmproxy's addon mechanism consists of a set of APIs that support components of -any complexity. Addons interact with mitmproxy by responding to **events**, -which allow them to hook into and change mitmproxy's behaviour. They are -configured through **[options]({{< relref concepts-options >}})**, which can be -set in mitmproxy's config file, changed interactively by users, or passed on the -command-line. Finally, they can expose **commands**, which allows users to -invoke their actions either directly or by binding them to keys in the -interactive tools. +Mitmproxy's addon mechanism is an exceptionally powerful part of mitmproxy. In fact, much of mitmproxy's own +functionality is defined in +[a suite of built-in addons](https://github.com/mitmproxy/mitmproxy/tree/master/mitmproxy/addons), +implementing everything from functionality like +[anticaching]({{< relref "overview-features#anticache" >}}) and [sticky cookies]({{< relref +"overview-features#sticky-cookies" >}}) to our onboarding webapp. -Addons are an exceptionally powerful part of mitmproxy. In fact, much of -mitmproxy's own functionality is defined in [a suite of built-in -addons](https://github.com/mitmproxy/mitmproxy/tree/master/mitmproxy/addons), -implementing everything from functionality like [anticaching]({{< relref -"overview-features#anticache" >}}) and [sticky cookies]({{< relref -"overview-features#sticky-cookies" >}}) to our onboarding webapp. The built-in -addons make for instructive reading, and you will quickly see that quite complex -functionality can often boil down to a very small, completely self-contained -modules. Mitmproxy provides the exact same set of facilities it uses for its own -functionality to third-party scripters and extenders. +Addons interact with mitmproxy by responding to [events]({{< relref addons-api >}}), which allow them to hook into and +change mitmproxy's behaviour. They are configured through [options]({{< relref addons-options >}}), which can be set in +mitmproxy's config file, changed interactively by users, or passed on the command-line. Finally, they can expose +[commands]({{< relref addons-commands >}}), which allows users to invoke their actions either directly or by binding +them to keys in the interactive tools. -This document will show you how to build addons using **events**, **options** -and **commands**. However, this is not an API manual, and the mitmproxy source -code remains the canonical reference. One easy way to explore the API from the -command-line is to use [pydoc](https://docs.python.org/3/library/pydoc.html). -Here, for example, is a command that shows the API documentation for the -mitmproxy's HTTP flow classes: - -```bash -pydoc mitmproxy.http -``` - -You will be referring to the mitmproxy API documentation frequently, so keep -**pydoc** or an equivalent handy. # Anatomy of an addon @@ -55,7 +35,7 @@ it into your mitmproxy tool of choice. We'll use mitmpdump in these examples, but the flag is identical for all tools: ```bash -> mitmdump -s ./anatomy.py +mitmdump -s ./anatomy.py ``` Here are a few things to note about the code above: @@ -63,12 +43,21 @@ Here are a few things to note about the code above: - Mitmproxy picks up the contents of the `addons` global list and loads what it finds into the addons mechanism. - Addons are just objects - in this case our addon is an instance of `Counter`. -- The `request` method is an example of an **event**. Addons simply implement a - method for each event they want to handle. Each event has a signature - consisting of arguments that are passed to the method. For `request`, this is - an instance of `mitmproxy.http.HTTPFlow`. +- The `request` method is an example of an *event*. Addons simply implement a + method for each event they want to handle. Each event and its signature are documented + in the [API documentation]({{< relref "addons-api" >}}). - Finally, the `ctx` module is a holdall module that exposes a set of standard objects that are commonly used in addons. We could pass a `ctx` object as the first parameter to every event, but we've found it neater to just expose it as an importable global. In this case, we're using the `ctx.log` object to do our logging. + + +# Abbreviated Scripting Syntax + +Sometimes, we would like to write a quick script without going through the trouble of creating a class. +The addons mechanism has a shorthand that allows a module as a whole to be treated as an addon object. +This lets us place event handler functions in the module scope. +For instance, here is a complete script that adds a header to every request: + +{{< example src="examples/addons/anatomy2.py" lang="py" >}} diff --git a/docs/src/content/addons-scripting.md b/docs/src/content/addons-scripting.md deleted file mode 100644 index 343f635c1..000000000 --- a/docs/src/content/addons-scripting.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: "Scripting" -menu: - addons: - weight: 5 ---- - -# Scripting HTTP/1.1 and HTTP/2.0 - -Sometimes, we would like to write a quick script without going through the -trouble of creating a class. The addons mechanism has a shorthand that allows a -module as a whole to be treated as an addon object. This lets us place event -handler functions in the module scope. For instance, here is a complete script -that adds a header to every request. - -{{< example src="examples/addons/scripting-minimal-example.py" lang="py" >}} - - -Here's another example that intercepts requests to a particular URL and sends -an arbitrary response instead: - -{{< example src="examples/addons/http-reply-from-proxy.py" lang="py" >}} - -All events around the HTTP protocol [can be found here]({{< relref "addons-events#http-events">}}). - -For HTTP-related objects, please look at the [http][] module, or the -[Request][], and [Response][] classes for other attributes that you can use when -scripting. - -# Scripting WebSocket - -The WebSocket protocol initially looks like a regular HTTP request, before the client and server agree to upgrade the connection to WebSocket. All scripting events for initial HTTP handshake, and also the dedicated WebSocket events [can be found here]({{< relref "addons-events#websocket-events">}}). - -{{< example src="examples/addons/websocket-simple.py" lang="py" >}} - -For WebSocket-related objects please look at the [websocket][] module to find -all attributes that you can use when scripting. - -[websocket]: https://github.com/mitmproxy/mitmproxy/blob/master/mitmproxy/websocket.py - - -# Scripting TCP - -All events around the TCP protocol [can be found here]({{< relref "addons-events#tcp-events">}}). - -{{< example src="examples/addons/tcp-simple.py" lang="py" >}} - -For WebSocket-related objects please look at the [tcp][] module to find -all attributes that you can use when scripting. - -[tcp]: https://github.com/mitmproxy/mitmproxy/blob/master/mitmproxy/tcp.py diff --git a/docs/src/content/api/mitmproxy.addonmanager.md b/docs/src/content/api/mitmproxy.addonmanager.md new file mode 100644 index 000000000..fb8bd181b --- /dev/null +++ b/docs/src/content/api/mitmproxy.addonmanager.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.addonmanager" +url: "api/mitmproxy/addonmanager.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/addonmanager.html" >}} diff --git a/docs/src/content/api/mitmproxy.certs.md b/docs/src/content/api/mitmproxy.certs.md new file mode 100644 index 000000000..6a3d3d468 --- /dev/null +++ b/docs/src/content/api/mitmproxy.certs.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.certs" +url: "api/mitmproxy/certs.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/certs.html" >}} diff --git a/docs/src/content/api/mitmproxy.connection.md b/docs/src/content/api/mitmproxy.connection.md new file mode 100644 index 000000000..5c48cdbd7 --- /dev/null +++ b/docs/src/content/api/mitmproxy.connection.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.connection" +url: "api/mitmproxy/connection.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/connection.html" >}} diff --git a/docs/src/content/api/mitmproxy.coretypes.multidict.md b/docs/src/content/api/mitmproxy.coretypes.multidict.md new file mode 100644 index 000000000..88eb79a51 --- /dev/null +++ b/docs/src/content/api/mitmproxy.coretypes.multidict.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.coretypes.multidict" +url: "api/mitmproxy/coretypes/multidict.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/coretypes/multidict.html" >}} diff --git a/docs/src/content/api/mitmproxy.flow.md b/docs/src/content/api/mitmproxy.flow.md new file mode 100644 index 000000000..e07394db1 --- /dev/null +++ b/docs/src/content/api/mitmproxy.flow.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.flow" +url: "api/mitmproxy/flow.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/flow.html" >}} diff --git a/docs/src/content/api/mitmproxy.http.md b/docs/src/content/api/mitmproxy.http.md new file mode 100644 index 000000000..780252096 --- /dev/null +++ b/docs/src/content/api/mitmproxy.http.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.http" +url: "api/mitmproxy/http.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/http.html" >}} diff --git a/docs/src/content/api/mitmproxy.net.server_spec.md b/docs/src/content/api/mitmproxy.net.server_spec.md new file mode 100644 index 000000000..5fe2274c9 --- /dev/null +++ b/docs/src/content/api/mitmproxy.net.server_spec.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.net.server_spec" +url: "api/mitmproxy/net/server_spec.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/net/server_spec.html" >}} diff --git a/docs/src/content/api/mitmproxy.proxy.server_hooks.md b/docs/src/content/api/mitmproxy.proxy.server_hooks.md new file mode 100644 index 000000000..bc8ac8e50 --- /dev/null +++ b/docs/src/content/api/mitmproxy.proxy.server_hooks.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.proxy.server_hooks" +url: "api/mitmproxy/proxy/server_hooks.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/proxy/server_hooks.html" >}} diff --git a/docs/src/content/api/mitmproxy.tcp.md b/docs/src/content/api/mitmproxy.tcp.md new file mode 100644 index 000000000..fea35b00e --- /dev/null +++ b/docs/src/content/api/mitmproxy.tcp.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.tcp" +url: "api/mitmproxy/tcp.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/tcp.html" >}} diff --git a/docs/src/content/api/mitmproxy.websocket.md b/docs/src/content/api/mitmproxy.websocket.md new file mode 100644 index 000000000..03661320f --- /dev/null +++ b/docs/src/content/api/mitmproxy.websocket.md @@ -0,0 +1,11 @@ + +--- +title: "mitmproxy.websocket" +url: "api/mitmproxy/websocket.html" + +menu: + addons: + parent: 'Event Hooks & API' +--- + +{{< readfile file="/generated/api/mitmproxy/websocket.html" >}} diff --git a/docs/src/layouts/partials/edit-on-github.html b/docs/src/layouts/partials/edit-on-github.html index d2c3098c2..a5de09108 100644 --- a/docs/src/layouts/partials/edit-on-github.html +++ b/docs/src/layouts/partials/edit-on-github.html @@ -1,4 +1,4 @@ -{{ if and .IsPage (not (getenv "DOCS_ARCHIVE")) }} +{{ if and .IsPage (ne .Type "api") (not (getenv "DOCS_ARCHIVE")) }} {{ end }} - diff --git a/docs/src/layouts/partials/sidemenu.html b/docs/src/layouts/partials/sidemenu.html index 035cc59e8..919abf3cf 100644 --- a/docs/src/layouts/partials/sidemenu.html +++ b/docs/src/layouts/partials/sidemenu.html @@ -3,9 +3,19 @@ {{ $currentPage := .ctx }} {{ $menuname := .menuname }} {{ range $menu.ByWeight }} -
  • +
  • {{ .Name }} + href="{{.URL}}">{{ .Name }} + {{ if and .HasChildren (or ($currentPage.IsMenuCurrent $menuname .) ($currentPage.HasMenuCurrent $menuname .)) }} + + {{ end }}
  • {{end}} - \ No newline at end of file + diff --git a/examples/addons/scripting-minimal-example.py b/examples/addons/anatomy2.py similarity index 53% rename from examples/addons/scripting-minimal-example.py rename to examples/addons/anatomy2.py index 9ecb02931..09814c486 100644 --- a/examples/addons/scripting-minimal-example.py +++ b/examples/addons/anatomy2.py @@ -1,2 +1,5 @@ +"""An addon using the abbreviated scripting syntax.""" + + def request(flow): flow.request.headers["myheader"] = "value" diff --git a/examples/addons/http-trailers.py b/examples/addons/http-trailers.py index 72470de0c..35dc30a86 100644 --- a/examples/addons/http-trailers.py +++ b/examples/addons/http-trailers.py @@ -36,7 +36,6 @@ def request(flow: http.HTTPFlow): def response(flow: http.HTTPFlow): - assert flow.response # make type checker happy if flow.response.trailers: print("HTTP Trailers detected! Response contains:", flow.response.trailers) diff --git a/examples/addons/internet-in-mirror.py b/examples/addons/internet-in-mirror.py new file mode 100644 index 000000000..0c274903e --- /dev/null +++ b/examples/addons/internet-in-mirror.py @@ -0,0 +1,14 @@ +""" +Mirror all web pages. + +Useful if you are living down under. +""" +from mitmproxy import http + + +def response(flow: http.HTTPFlow) -> None: + if flow.response and flow.response.content: + flow.response.content = flow.response.content.replace( + b"", + b"" + ) diff --git a/examples/addons/internet_in_mirror.py b/examples/addons/internet_in_mirror.py deleted file mode 100644 index 8d33cea90..000000000 --- a/examples/addons/internet_in_mirror.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Mirror all web pages. - -Useful if you are living down under. -""" -from mitmproxy import http - - -def response(flow: http.HTTPFlow) -> None: - assert flow.response # make type checker happy - reflector = b"" - flow.response.content = flow.response.content.replace(b"", reflector) diff --git a/examples/addons/websocket-inject-message.py b/examples/addons/websocket-inject-message.py deleted file mode 100644 index 3999be3b6..000000000 --- a/examples/addons/websocket-inject-message.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Inject a WebSocket message into a running connection. - -This example shows how to inject a WebSocket message to the client. -Every new WebSocket connection will trigger a new asyncio task that -periodically injects a new message to the client. -""" -import asyncio -import mitmproxy.websocket - - -class InjectWebSocketMessage: - async def inject(self, flow: mitmproxy.websocket.WebSocketFlow): - i = 0 - while not flow.ended and not flow.error: - await asyncio.sleep(5) - flow.inject_message(flow.client_conn, f'This is the #{i} injected message!') - i += 1 - - def websocket_start(self, flow): - asyncio.get_event_loop().create_task(self.inject(flow)) - - -addons = [InjectWebSocketMessage()] diff --git a/mitmproxy/addonmanager.py b/mitmproxy/addonmanager.py index 801a8e90c..a0def682c 100644 --- a/mitmproxy/addonmanager.py +++ b/mitmproxy/addonmanager.py @@ -98,6 +98,11 @@ class Loader: ) def add_command(self, path: str, func: typing.Callable) -> None: + """Add a command to mitmproxy. + + Unless you are generating commands programatically, + this API should be avoided. Decorate your function with `@mitmproxy.command.command` instead. + """ self.master.commands.add(path, func) diff --git a/mitmproxy/addons/dumper.py b/mitmproxy/addons/dumper.py index 95807cba0..c47c32dcc 100644 --- a/mitmproxy/addons/dumper.py +++ b/mitmproxy/addons/dumper.py @@ -80,11 +80,11 @@ class Dumper: def _echo_headers(self, headers: http.Headers): for k, v in headers.fields: - k = strutils.bytes_to_escaped_str(k) - v = strutils.bytes_to_escaped_str(v) + ks = strutils.bytes_to_escaped_str(k) + vs = strutils.bytes_to_escaped_str(v) out = "{}: {}".format( - click.style(k, fg="blue"), - click.style(v) + click.style(ks, fg="blue"), + click.style(vs) ) self.echo(out, ident=4) diff --git a/mitmproxy/certs.py b/mitmproxy/certs.py index dd1adb617..afe1bca3b 100644 --- a/mitmproxy/certs.py +++ b/mitmproxy/certs.py @@ -39,6 +39,7 @@ rD693XKIHUCWOjMh1if6omGXKHH40QuME2gNa50+YPn1iYDl88uDbbMCAQI= class Cert(serializable.Serializable): + """Representation of a (TLS) certificate.""" _cert: x509.Certificate def __init__(self, cert: x509.Certificate): diff --git a/mitmproxy/connection.py b/mitmproxy/connection.py index 50c92f5c9..ad862cc43 100644 --- a/mitmproxy/connection.py +++ b/mitmproxy/connection.py @@ -29,7 +29,7 @@ class Connection(serializable.Serializable, metaclass=ABCMeta): Base class for client and server connections. The connection object only exposes metadata about the connection, but not the underlying socket object. - This is intentional, all I/O should be handled by mitmproxy.proxy.server exclusively. + This is intentional, all I/O should be handled by `mitmproxy.proxy.server` exclusively. """ # all connections have a unique id. While # f.client_conn == f2.client_conn already holds true for live flows (where we have object identity), @@ -92,12 +92,12 @@ class Connection(serializable.Serializable, metaclass=ABCMeta): @property def connected(self) -> bool: - """`True` if Connection.state is ConnectionState.OPEN, `False` otherwise. Read-only.""" + """*Read-only:* `True` if Connection.state is ConnectionState.OPEN, `False` otherwise.""" return self.state is ConnectionState.OPEN @property def tls_established(self) -> bool: - """`True` if TLS has been established, `False` otherwise. Read-only.""" + """*Read-only:* `True` if TLS has been established, `False` otherwise.""" return self.timestamp_tls_setup is not None def __eq__(self, other): @@ -143,7 +143,7 @@ class Client(Connection): timestamp_start: float """*Timestamp:* TCP SYN received""" - def __init__(self, peername, sockname, timestamp_start): + def __init__(self, peername: Address, sockname: Address, timestamp_start: float): self.id = str(uuid.uuid4()) self.peername = peername self.sockname = sockname diff --git a/mitmproxy/coretypes/multidict.py b/mitmproxy/coretypes/multidict.py index 336dd910d..c1ac9f9a4 100644 --- a/mitmproxy/coretypes/multidict.py +++ b/mitmproxy/coretypes/multidict.py @@ -1,10 +1,26 @@ -from abc import ABCMeta, abstractmethod +from abc import ABCMeta +from abc import abstractmethod +from typing import Iterator +from typing import List +from typing import MutableMapping +from typing import Sequence +from typing import Tuple +from typing import TypeVar -from collections.abc import MutableMapping from mitmproxy.coretypes import serializable +KT = TypeVar('KT') +VT = TypeVar('VT') + + +class _MultiDict(MutableMapping[KT, VT], metaclass=ABCMeta): + """ + A MultiDict is a dictionary-like data structure that supports multiple values per key. + """ + + fields: Tuple[Tuple[KT, VT], ...] + """The underlying raw datastructure.""" -class _MultiDict(MutableMapping, metaclass=ABCMeta): def __repr__(self): fields = ( repr(field) @@ -17,7 +33,7 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): @staticmethod @abstractmethod - def _reduce_values(values): + def _reduce_values(values: Sequence[VT]) -> VT: """ If a user accesses multidict["foo"], this method reduces all values for "foo" to a single value that is returned. @@ -27,22 +43,22 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): @staticmethod @abstractmethod - def _kconv(key): + def _kconv(key: KT) -> KT: """ This method converts a key to its canonical representation. For example, HTTP headers are case-insensitive, so this method returns key.lower(). """ - def __getitem__(self, key): + def __getitem__(self, key: KT) -> VT: values = self.get_all(key) if not values: raise KeyError(key) return self._reduce_values(values) - def __setitem__(self, key, value): + def __setitem__(self, key: KT, value: VT) -> None: self.set_all(key, [value]) - def __delitem__(self, key): + def __delitem__(self, key: KT) -> None: if key not in self: raise KeyError(key) key = self._kconv(key) @@ -51,7 +67,7 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): if key != self._kconv(field[0]) ) - def __iter__(self): + def __iter__(self) -> Iterator[KT]: seen = set() for key, _ in self.fields: key_kconv = self._kconv(key) @@ -59,15 +75,15 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): seen.add(key_kconv) yield key - def __len__(self): + def __len__(self) -> int: return len({self._kconv(key) for key, _ in self.fields}) - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, MultiDict): return self.fields == other.fields return False - def get_all(self, key): + def get_all(self, key: KT) -> List[VT]: """ Return the list of all values for a given key. If that key is not in the MultiDict, the return value will be an empty list. @@ -79,13 +95,13 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): if self._kconv(k) == key ] - def set_all(self, key, values): + def set_all(self, key: KT, values: List[VT]) -> None: """ Remove the old values for a key and add new ones. """ key_kconv = self._kconv(key) - new_fields = [] + new_fields: List[Tuple[KT, VT]] = [] for field in self.fields: if self._kconv(field[0]) == key_kconv: if values: @@ -100,55 +116,49 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): ) self.fields = tuple(new_fields) - def add(self, key, value): + def add(self, key: KT, value: VT) -> None: """ Add an additional value for the given key at the bottom. """ self.insert(len(self.fields), key, value) - def insert(self, index, key, value): + def insert(self, index: int, key: KT, value: VT) -> None: """ Insert an additional value for the given key at the specified position. """ item = (key, value) self.fields = self.fields[:index] + (item,) + self.fields[index:] - def keys(self, multi=False): + def keys(self, multi: bool = False): """ Get all keys. - Args: - multi(bool): - If True, one key per value will be returned. - If False, duplicate keys will only be returned once. + If `multi` is True, one key per value will be returned. + If `multi` is False, duplicate keys will only be returned once. """ return ( k for k, _ in self.items(multi) ) - def values(self, multi=False): + def values(self, multi: bool = False): """ Get all values. - Args: - multi(bool): - If True, all values will be returned. - If False, only the first value per key will be returned. + If `multi` is True, all values will be returned. + If `multi` is False, only the first value per key will be returned. """ return ( v for _, v in self.items(multi) ) - def items(self, multi=False): + def items(self, multi: bool = False): """ Get all (key, value) tuples. - Args: - multi(bool): - If True, all (key, value) pairs will be returned - If False, only the first (key, value) pair per unique key will be returned. + If `multi` is True, all `(key, value)` pairs will be returned. + If False, only one tuple per key is returned. """ if multi: return self.fields @@ -156,7 +166,9 @@ class _MultiDict(MutableMapping, metaclass=ABCMeta): return super().items() -class MultiDict(_MultiDict, serializable.Serializable): +class MultiDict(_MultiDict[KT, VT], serializable.Serializable): + """A concrete MultiDict, storing its own data.""" + def __init__(self, fields=()): super().__init__() self.fields = tuple( @@ -182,12 +194,13 @@ class MultiDict(_MultiDict, serializable.Serializable): return cls(state) -class MultiDictView(_MultiDict): +class MultiDictView(_MultiDict[KT, VT]): """ The MultiDictView provides the MultiDict interface over calculated data. The view itself contains no state - data is retrieved from the parent on request, and stored back to the parent on change. """ + def __init__(self, getter, setter): self._getter = getter self._setter = setter @@ -204,7 +217,7 @@ class MultiDictView(_MultiDict): # multiple elements exist with the same key. return values[0] - @property + @property # type: ignore def fields(self): return self._getter() @@ -212,5 +225,5 @@ class MultiDictView(_MultiDict): def fields(self, value): self._setter(value) - def copy(self): + def copy(self) -> "MultiDict[KT,VT]": return MultiDict(self.fields) diff --git a/mitmproxy/flow.py b/mitmproxy/flow.py index bf2278950..d33f637fa 100644 --- a/mitmproxy/flow.py +++ b/mitmproxy/flow.py @@ -10,21 +10,21 @@ from mitmproxy import version class Error(stateobject.StateObject): """ - An Error. + An Error. - This is distinct from an protocol error response (say, a HTTP code 500), - which is represented by a normal `mitmproxy.http.Response` object. This class is - responsible for indicating errors that fall outside of normal protocol - communications, like interrupted connections, timeouts, protocol errors. + This is distinct from an protocol error response (say, a HTTP code 500), + which is represented by a normal `mitmproxy.http.Response` object. This class is + responsible for indicating errors that fall outside of normal protocol + communications, like interrupted connections, timeouts, or protocol errors. """ msg: str """Message describing the error.""" timestamp: float - """Unix timestamp""" + """Unix timestamp of when this error happened.""" - KILLED_MESSAGE = "Connection killed." + KILLED_MESSAGE: typing.ClassVar[str] = "Connection killed." def __init__(self, msg: str, timestamp: typing.Optional[float] = None) -> None: """Create an error. If no timestamp is passed, the current time is used.""" @@ -53,8 +53,46 @@ class Error(stateobject.StateObject): class Flow(stateobject.StateObject): """ - A Flow is a collection of objects representing a single transaction. - This class is usually subclassed for each protocol, e.g. HTTPFlow. + Base class for network flows. A flow is a collection of objects, + for example HTTP request/response pairs or a list of TCP messages. + + See also: + - mitmproxy.http.HTTPFlow + - mitmproxy.tcp.TCPFlow + """ + client_conn: connection.Client + """The client that connected to mitmproxy.""" + + server_conn: connection.Server + """ + The server mitmproxy connected to. + + Some flows may never cause mitmproxy to initiate a server connection, + for example because their response is replayed by mitmproxy itself. + To simplify implementation, those flows will still have a `server_conn` attribute + with a `timestamp_start` set to `None`. + """ + + error: typing.Optional[Error] = None + """A connection or protocol error affecting this flow.""" + + intercepted: bool + """ + If `True`, the flow is currently paused by mitmproxy. + We're waiting for a user action to forward the flow to its destination. + """ + + marked: bool + """ + If `True`, this flow has been marked by the user. + """ + + is_replay: typing.Optional[str] + """ + This attribute indicates if this flow has been replayed in either direction. + + - a value of `request` indicates that the request has been artifically replayed by mitmproxy to the server. + - a value of `response` indicates that the response to the client's request has been set by server replay. """ def __init__( @@ -70,7 +108,6 @@ class Flow(stateobject.StateObject): self.server_conn = server_conn self.live = live - self.error: typing.Optional[Error] = None self.intercepted: bool = False self._backup: typing.Optional[Flow] = None self.reply: typing.Optional[controller.Reply] = None @@ -111,6 +148,7 @@ class Flow(stateobject.StateObject): return f def copy(self): + """Make a copy of this flow.""" f = super().copy() f.live = False if self.reply is not None: @@ -119,7 +157,7 @@ class Flow(stateobject.StateObject): def modified(self): """ - Has this Flow been modified? + `True` if this file has been modified by a user, `False` otherwise. """ if self._backup: return self._backup != self.get_state() @@ -128,8 +166,7 @@ class Flow(stateobject.StateObject): def backup(self, force=False): """ - Save a backup of this Flow, which can be reverted to using a - call to .revert(). + Save a backup of this flow, which can be restored by calling `Flow.revert()`. """ if not self._backup: self._backup = self.get_state() @@ -144,6 +181,7 @@ class Flow(stateobject.StateObject): @property def killable(self): + """*Read-only:* `True` if this flow can be killed, `False` otherwise.""" return ( self.reply and self.reply.state in {"start", "taken"} and @@ -152,7 +190,7 @@ class Flow(stateobject.StateObject): def kill(self): """ - Kill this request. + Kill this flow. The current request/response will not be forwarded to its destination. """ if not self.killable: raise exceptions.ControlException("Flow is not killable.") @@ -162,8 +200,8 @@ class Flow(stateobject.StateObject): def intercept(self): """ - Intercept this Flow. Processing will stop until resume is - called. + Intercept this Flow. Processing will stop until resume is + called. """ if self.intercepted: return @@ -172,7 +210,7 @@ class Flow(stateobject.StateObject): def resume(self): """ - Continue with the flow - called after an intercept(). + Continue with the flow – called after an intercept(). """ if not self.intercepted: return @@ -183,5 +221,15 @@ class Flow(stateobject.StateObject): @property def timestamp_start(self) -> float: - """Start time of the flow.""" + """ + *Read-only:* Start time of the flow. + Depending on the flow type, this property is an alias for + `mitmproxy.connection.Client.timestamp_start` or `mitmproxy.http.Request.timestamp_start`. + """ return self.client_conn.timestamp_start + + +__all__ = [ + "Flow", + "Error", +] diff --git a/mitmproxy/http.py b/mitmproxy/http.py index 098eee6f0..1893a9ef5 100644 --- a/mitmproxy/http.py +++ b/mitmproxy/http.py @@ -3,23 +3,30 @@ import time import urllib.parse from dataclasses import dataclass from dataclasses import fields -from email.utils import formatdate, mktime_tz, parsedate_tz -from typing import Callable, cast +from email.utils import formatdate +from email.utils import mktime_tz +from email.utils import parsedate_tz +from typing import Callable from typing import Dict from typing import Iterable +from typing import Iterator +from typing import List from typing import Mapping from typing import Optional from typing import Tuple from typing import Union +from typing import cast -from mitmproxy import flow, connection +from mitmproxy import flow from mitmproxy.coretypes import multidict from mitmproxy.coretypes import serializable from mitmproxy.net import encoding -from mitmproxy.net.http import cookies, multipart +from mitmproxy.net.http import cookies +from mitmproxy.net.http import multipart from mitmproxy.net.http import status_codes from mitmproxy.net.http import url -from mitmproxy.net.http.headers import assemble_content_type, parse_content_type +from mitmproxy.net.http.headers import assemble_content_type +from mitmproxy.net.http.headers import parse_content_type from mitmproxy.utils import human from mitmproxy.utils import strutils from mitmproxy.utils import typecheck @@ -28,72 +35,70 @@ from mitmproxy.utils.strutils import always_str # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. -def _native(x): +def _native(x: bytes) -> str: return x.decode("utf-8", "surrogateescape") -def _always_bytes(x): +def _always_bytes(x: Union[str, bytes]) -> bytes: return strutils.always_bytes(x, "utf-8", "surrogateescape") -class Headers(multidict.MultiDict): +# This cannot be easily typed with mypy yet, so we just specify MultiDict without concrete types. +class Headers(multidict.MultiDict): # type: ignore """ Header class which allows both convenient access to individual headers as well as direct access to the underlying raw data. Provides a full dictionary interface. - Example: + Create headers with keyword arguments: + >>> h = Headers(host="example.com", content_type="application/xml") - .. code-block:: python + Headers mostly behave like a normal dict: + >>> h["Host"] + "example.com" - # Create headers with keyword arguments - >>> h = Headers(host="example.com", content_type="application/xml") + Headers are case insensitive: + >>> h["host"] + "example.com" - # Headers mostly behave like a normal dict. - >>> h["Host"] - "example.com" + Headers can also be created from a list of raw (header_name, header_value) byte tuples: + >>> h = Headers([ + (b"Host",b"example.com"), + (b"Accept",b"text/html"), + (b"accept",b"application/xml") + ]) - # HTTP Headers are case insensitive - >>> h["host"] - "example.com" + Multiple headers are folded into a single header as per RFC 7230: + >>> h["Accept"] + "text/html, application/xml" - # Headers can also be created from a list of raw (header_name, header_value) byte tuples - >>> h = Headers([ - (b"Host",b"example.com"), - (b"Accept",b"text/html"), - (b"accept",b"application/xml") - ]) + Setting a header removes all existing headers with the same name: + >>> h["Accept"] = "application/text" + >>> h["Accept"] + "application/text" - # Multiple headers are folded into a single header as per RFC7230 - >>> h["Accept"] - "text/html, application/xml" + `bytes(h)` returns an HTTP/1 header block: + >>> print(bytes(h)) + Host: example.com + Accept: application/text - # Setting a header removes all existing headers with the same name. - >>> h["Accept"] = "application/text" - >>> h["Accept"] - "application/text" - - # bytes(h) returns a HTTP1 header block. - >>> print(bytes(h)) - Host: example.com - Accept: application/text - - # For full control, the raw header fields can be accessed - >>> h.fields + For full control, the raw header fields can be accessed: + >>> h.fields Caveats: - For use with the "Set-Cookie" header, see :py:meth:`get_all`. + - For use with the "Set-Cookie" header, either use `Response.cookies` or see `Headers.get_all`. """ - def __init__(self, fields=(), **headers): + def __init__(self, fields: Iterable[Tuple[bytes, bytes]] = (), **headers): """ - Args: - fields: (optional) list of ``(name, value)`` header byte tuples, - e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes. - **headers: Additional headers to set. Will overwrite existing values from `fields`. - For convenience, underscores in header names will be transformed to dashes - - this behaviour does not extend to other methods. - If ``**headers`` contains multiple keys that have equal ``.lower()`` s, - the behavior is undefined. + *Args:* + - *fields:* (optional) list of ``(name, value)`` header byte tuples, + e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes. + - *\*\*headers:* Additional headers to set. Will overwrite existing values from `fields`. + For convenience, underscores in header names will be transformed to dashes - + this behaviour does not extend to other methods. + + If ``**headers`` contains multiple keys that have equal ``.lower()`` representations, + the behavior is undefined. """ super().__init__(fields) @@ -102,41 +107,43 @@ class Headers(multidict.MultiDict): raise TypeError("Header fields must be bytes.") # content_type -> content-type - headers = { + self.update({ _always_bytes(name).replace(b"_", b"-"): _always_bytes(value) for name, value in headers.items() - } - self.update(headers) + }) + + fields: Tuple[Tuple[bytes, bytes], ...] @staticmethod - def _reduce_values(values): + def _reduce_values(values) -> str: # Headers can be folded return ", ".join(values) @staticmethod - def _kconv(key): + def _kconv(key) -> str: # Headers are case-insensitive return key.lower() - def __bytes__(self): + def __bytes__(self) -> bytes: if self.fields: return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n" else: return b"" - def __delitem__(self, key): + def __delitem__(self, key: Union[str, bytes]) -> None: key = _always_bytes(key) super().__delitem__(key) - def __iter__(self): + def __iter__(self) -> Iterator[str]: for x in super().__iter__(): yield _native(x) - def get_all(self, name): + def get_all(self, name: Union[str, bytes]) -> List[str]: """ - Like :py:meth:`get`, but does not fold multiple headers into a single one. + Like `Headers.get`, but does not fold multiple headers into a single one. This is useful for Set-Cookie headers, which do not support folding. - See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 + + *See also:* """ name = _always_bytes(name) return [ @@ -144,16 +151,16 @@ class Headers(multidict.MultiDict): super().get_all(name) ] - def set_all(self, name, values): + def set_all(self, name: Union[str, bytes], values: List[Union[str, bytes]]): """ Explicitly set multiple headers for the given key. - See: :py:meth:`get_all` + See `Headers.get_all`. """ name = _always_bytes(name) values = [_always_bytes(x) for x in values] return super().set_all(name, values) - def insert(self, index, key, value): + def insert(self, index: int, key: Union[str, bytes], value: Union[str, bytes]): key = _always_bytes(key) value = _always_bytes(value) super().insert(index, key, value) @@ -222,6 +229,8 @@ class ResponseData(MessageData): class Message(serializable.Serializable): + """Base class for `Request` and `Response`.""" + @classmethod def from_state(cls, state): return cls(**state) @@ -233,12 +242,21 @@ class Message(serializable.Serializable): self.data.set_state(state) data: MessageData - stream: Union[Callable, bool] = False + stream: Union[Callable[[bytes], bytes], bool] = False + """ + If `True`, the message body will not be buffered on the proxy + but immediately streamed to the destination instead. + Alternatively, a transformation function can be specified, but please note + that packet should not be relied upon. + + This attribute must be set in the `requestheaders` or `responseheaders` hook. + Setting it in `request` or `response` is already too late, mitmproxy has buffered the message body already. + """ @property def http_version(self) -> str: """ - Version string, e.g. "HTTP/1.1" + HTTP version string, for example `HTTP/1.1`. """ return self.data.http_version.decode("utf-8", "surrogateescape") @@ -272,7 +290,7 @@ class Message(serializable.Serializable): @property def trailers(self) -> Optional[Headers]: """ - The HTTP trailers. + The [HTTP trailers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Trailer). """ return self.data.trailers @@ -283,9 +301,11 @@ class Message(serializable.Serializable): @property def raw_content(self) -> Optional[bytes]: """ - The raw (potentially compressed) HTTP message body as bytes. + The raw (potentially compressed) HTTP message body. - See also: :py:attr:`content`, :py:class:`text` + In contrast to `Message.content` and `Message.text`, accessing this property never raises. + + *See also:* `Message.content`, `Message.text` """ return self.data.content @@ -293,31 +313,35 @@ class Message(serializable.Serializable): def raw_content(self, content: Optional[bytes]) -> None: self.data.content = content - def get_content(self, strict: bool = True) -> Optional[bytes]: + @property + def content(self) -> Optional[bytes]: """ The uncompressed HTTP message body as bytes. - Raises: - ValueError, when the HTTP content-encoding is invalid and strict is True. + Accessing this attribute may raise a `ValueError` when the HTTP content-encoding is invalid. - See also: :py:class:`raw_content`, :py:attr:`text` + *See also:* `Message.raw_content`, `Message.text` """ - if self.raw_content is None: - return None - ce = self.headers.get("content-encoding") - if ce: - try: - content = encoding.decode(self.raw_content, ce) - # A client may illegally specify a byte -> str encoding here (e.g. utf8) - if isinstance(content, str): - raise ValueError(f"Invalid Content-Encoding: {ce}") - return content - except ValueError: - if strict: - raise - return self.raw_content - else: - return self.raw_content + return self.get_content() + + @content.setter + def content(self, value: Optional[bytes]) -> None: + self.set_content(value) + + @property + def text(self) -> Optional[str]: + """ + The uncompressed and decoded HTTP message body as text. + + Accessing this attribute may raise a `ValueError` when either content-encoding or charset is invalid. + + *See also:* `Message.raw_content`, `Message.content` + """ + return self.get_text() + + @text.setter + def text(self, value: Optional[str]) -> None: + self.set_text(value) def set_content(self, value: Optional[bytes]) -> None: if value is None: @@ -338,29 +362,27 @@ class Message(serializable.Serializable): self.raw_content = value self.headers["content-length"] = str(len(self.raw_content)) - content = property(get_content, set_content) - - @property - def timestamp_start(self) -> float: + def get_content(self, strict: bool = True) -> Optional[bytes]: """ - First byte timestamp + Similar to `Message.content`, but does not raise if `strict` is `False`. + Instead, the compressed message body is returned as-is. """ - return self.data.timestamp_start - - @timestamp_start.setter - def timestamp_start(self, timestamp_start: float) -> None: - self.data.timestamp_start = timestamp_start - - @property - def timestamp_end(self) -> Optional[float]: - """ - Last byte timestamp - """ - return self.data.timestamp_end - - @timestamp_end.setter - def timestamp_end(self, timestamp_end: Optional[float]): - self.data.timestamp_end = timestamp_end + if self.raw_content is None: + return None + ce = self.headers.get("content-encoding") + if ce: + try: + content = encoding.decode(self.raw_content, ce) + # A client may illegally specify a byte -> str encoding here (e.g. utf8) + if isinstance(content, str): + raise ValueError(f"Invalid Content-Encoding: {ce}") + return content + except ValueError: + if strict: + raise + return self.raw_content + else: + return self.raw_content def _get_content_type_charset(self) -> Optional[str]: ct = parse_content_type(self.headers.get("content-type", "")) @@ -391,14 +413,26 @@ class Message(serializable.Serializable): return enc + def set_text(self, text: Optional[str]) -> None: + if text is None: + self.content = None + return + enc = self._guess_encoding() + + try: + self.content = cast(bytes, encoding.encode(text, enc)) + except ValueError: + # Fall back to UTF-8 and update the content-type header. + ct = parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {}) + ct[2]["charset"] = "utf-8" + self.headers["content-type"] = assemble_content_type(*ct) + enc = "utf8" + self.content = text.encode(enc, "surrogateescape") + def get_text(self, strict: bool = True) -> Optional[str]: """ - The uncompressed and decoded HTTP message body as text. - - Raises: - ValueError, when either content-encoding or charset is invalid and strict is True. - - See also: :py:attr:`content`, :py:class:`raw_content` + Similar to `Message.text`, but does not raise if `strict` is `False`. + Instead, the message body is returned as surrogate-escaped UTF-8. """ content = self.get_content(strict) if content is None: @@ -411,23 +445,27 @@ class Message(serializable.Serializable): raise return content.decode("utf8", "surrogateescape") - def set_text(self, text: Optional[str]) -> None: - if text is None: - self.content = None - return - enc = self._guess_encoding() + @property + def timestamp_start(self) -> float: + """ + *Timestamp:* Headers received. + """ + return self.data.timestamp_start - try: - self.content = encoding.encode(text, enc) - except ValueError: - # Fall back to UTF-8 and update the content-type header. - ct = parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {}) - ct[2]["charset"] = "utf-8" - self.headers["content-type"] = assemble_content_type(*ct) - enc = "utf8" - self.content = text.encode(enc, "surrogateescape") + @timestamp_start.setter + def timestamp_start(self, timestamp_start: float) -> None: + self.data.timestamp_start = timestamp_start - text = property(get_text, set_text) + @property + def timestamp_end(self) -> Optional[float]: + """ + *Timestamp:* Last byte received. + """ + return self.data.timestamp_end + + @timestamp_end.setter + def timestamp_end(self, timestamp_end: Optional[float]): + self.data.timestamp_end = timestamp_end def decode(self, strict: bool = True) -> None: """ @@ -435,26 +473,25 @@ class Message(serializable.Serializable): removes the header. If there is no Content-Encoding header, no action is taken. - Raises: - ValueError, when the content-encoding is invalid and strict is True. + *Raises:* + - `ValueError`, when the content-encoding is invalid and strict is True. """ decoded = self.get_content(strict) self.headers.pop("content-encoding", None) self.content = decoded - def encode(self, e: str) -> None: + def encode(self, encoding: str) -> None: """ - Encodes body with the encoding e, where e is "gzip", "deflate", "identity", "br", or "zstd". - Any existing content-encodings are overwritten, - the content is not decoded beforehand. + Encodes body with the given encoding, where e is "gzip", "deflate", "identity", "br", or "zstd". + Any existing content-encodings are overwritten, the content is not decoded beforehand. - Raises: - ValueError, when the specified content-encoding is invalid. + *Raises:* + - `ValueError`, when the specified content-encoding is invalid. """ - self.headers["content-encoding"] = e + self.headers["content-encoding"] = encoding self.content = self.raw_content if "content-encoding" not in self.headers: - raise ValueError("Invalid content encoding {}".format(repr(e))) + raise ValueError("Invalid content encoding {}".format(repr(encoding))) class Request(Message): @@ -474,7 +511,7 @@ class Request(Message): http_version: bytes, headers: Union[Headers, Tuple[Tuple[bytes, bytes], ...]], content: Optional[bytes], - trailers: Union[None, Headers, Tuple[Tuple[bytes, bytes], ...]], + trailers: Union[Headers, Tuple[Tuple[bytes, bytes], ...], None], timestamp_start: float, timestamp_end: Optional[float], ): @@ -543,7 +580,7 @@ class Request(Message): for k, v in headers.items() ) elif isinstance(headers, Iterable): - headers = Headers(headers) + headers = Headers(headers) # type: ignore else: raise TypeError("Expected headers to be an iterable or dict, but is {}.".format( type(headers).__name__ @@ -578,7 +615,7 @@ class Request(Message): @property def first_line_format(self) -> str: """ - HTTP request form as defined in `RFC7230 `_. + *Read-only:* HTTP request form as defined in [RFC 7230](https://tools.ietf.org/html/rfc7230#section-5.3). origin-form and asterisk-form are subsumed as "relative". """ @@ -617,9 +654,12 @@ class Request(Message): HTTP request authority. For HTTP/1, this is the authority portion of the request target - (in either absolute-form or authority-form) + (in either absolute-form or authority-form). + For origin-form and asterisk-form requests, this property is set to an empty string. For HTTP/2, this is the :authority pseudo header. + + *See also:* `Request.host`, `Request.host_header`, `Request.pretty_host` """ try: return self.data.authority.decode("idna") @@ -638,11 +678,13 @@ class Request(Message): @property def host(self) -> str: """ - Target host. This may be parsed from the raw request + Target server for this request. This may be parsed from the raw request (e.g. from a ``GET http://example.com/ HTTP/1.1`` request line) or inferred from the proxy mode (e.g. an IP in transparent mode). Setting the host attribute also updates the host header and authority information, if present. + + *See also:* `Request.authority`, `Request.host_header`, `Request.pretty_host` """ return self.data.host @@ -664,6 +706,8 @@ class Request(Message): This property maps to either ``request.headers["Host"]`` or ``request.authority``, depending on whether it's HTTP/1.x or HTTP/2.0. + + *See also:* `Request.authority`,`Request.host`, `Request.pretty_host` """ if self.is_http2: return self.authority or self.data.headers.get("Host", None) @@ -686,7 +730,7 @@ class Request(Message): @property def port(self) -> int: """ - Target port + Target port. """ return self.data.port @@ -709,7 +753,9 @@ class Request(Message): @property def url(self) -> str: """ - The URL string, constructed from the request's URL components. + The full URL string, constructed from `Request.scheme`, `Request.host`, `Request.port` and `Request.path`. + + Settings this property updates these attributes as well. """ if self.first_line_format == "authority": return f"{self.host}:{self.port}" @@ -723,9 +769,11 @@ class Request(Message): @property def pretty_host(self) -> str: """ - Similar to :py:attr:`host`, but using the host/:authority header as an additional (preferred) data source. - This is useful in transparent mode where :py:attr:`host` is only an IP address, - but may not reflect the actual destination as the Host header could be spoofed. + *Read-only:* Like `Request.host`, but using `Request.host_header` header as an additional (preferred) data source. + This is useful in transparent mode where `Request.host` is only an IP address. + + *Warning:* When working in adversarial environments, this may not reflect the actual destination + as the Host header could be spoofed. """ authority = self.host_header if authority: @@ -736,7 +784,7 @@ class Request(Message): @property def pretty_url(self) -> str: """ - Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`. + *Read-only:* Like `Request.url`, but using `Request.pretty_host` instead of `Request.host`. """ if self.first_line_format == "authority": return self.authority @@ -760,9 +808,11 @@ class Request(Message): self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment]) @property - def query(self) -> multidict.MultiDictView: + def query(self) -> multidict.MultiDictView[str, str]: """ - The request query string as an :py:class:`~mitmproxy.net.multidict.MultiDictView` object. + The request query as a mutable mapping view on the request's path. + For the most part, this behaves like a dictionary. + Modifications to the MultiDictView update `Request.path`, and vice versa. """ return multidict.MultiDictView( self._get_query, @@ -781,11 +831,11 @@ class Request(Message): self.headers["cookie"] = cookies.format_cookie_header(value) @property - def cookies(self) -> multidict.MultiDictView: + def cookies(self) -> multidict.MultiDictView[str, str]: """ The request cookies. - - An empty :py:class:`~mitmproxy.net.multidict.MultiDictView` object if the cookie monster ate them all. + For the most part, this behaves like a dictionary. + Modifications to the MultiDictView update `Request.headers`, and vice versa. """ return multidict.MultiDictView( self._get_cookies, @@ -797,7 +847,7 @@ class Request(Message): self._set_cookies(value) @property - def path_components(self): + def path_components(self) -> Tuple[str, ...]: """ The URL's path components as a tuple of strings. Components are unquoted. @@ -809,7 +859,7 @@ class Request(Message): return tuple(url.unquote(i) for i in path.split("/") if i) @path_components.setter - def path_components(self, components): + def path_components(self, components: Iterable[str]): components = map(lambda x: url.quote(x, safe=""), components) path = "/" + "/".join(components) _, _, _, params, query, fragment = urllib.parse.urlparse(self.url) @@ -817,27 +867,24 @@ class Request(Message): def anticache(self) -> None: """ - Modifies this request to remove headers that might produce a cached - response. That is, we remove ETags and If-Modified-Since headers. + Modifies this request to remove headers that might produce a cached response. """ - delheaders = [ + delheaders = ( "if-modified-since", "if-none-match", - ] + ) for i in delheaders: self.headers.pop(i, None) def anticomp(self) -> None: """ - Modifies this request to remove headers that will compress the - resource's data. + Modify the Accept-Encoding header to only accept uncompressed responses. """ self.headers["accept-encoding"] = "identity" def constrain_encoding(self) -> None: """ - Limits the permissible Accept-Encoding values, based on what we can - decode appropriately. + Limits the permissible Accept-Encoding values, based on what we can decode appropriately. """ accept_encoding = self.headers.get("accept-encoding") if accept_encoding: @@ -864,13 +911,14 @@ class Request(Message): self.content = url.encode(form_data, self.get_text(strict=False)).encode() @property - def urlencoded_form(self): + def urlencoded_form(self) -> multidict.MultiDictView[str, str]: """ - The URL-encoded form data as an :py:class:`~mitmproxy.net.multidict.MultiDictView` object. - An empty multidict.MultiDictView if the content-type indicates non-form data - or the content could not be parsed. + The URL-encoded form data. - Starting with mitmproxy 1.0, key and value are strings. + If the content-type indicates non-form data or the form could not be parsed, this is set to + an empty `MultiDictView`. + + Modifications to the MultiDictView update `Request.content`, and vice versa. """ return multidict.MultiDictView( self._get_urlencoded_form, @@ -895,13 +943,14 @@ class Request(Message): self.headers["content-type"] = "multipart/form-data" @property - def multipart_form(self): + def multipart_form(self) -> multidict.MultiDictView[bytes, bytes]: """ - The multipart form data as an :py:class:`~mitmproxy.net.multidict.MultiDictView` object. - An empty multidict.MultiDictView if the content-type indicates non-form data - or the content could not be parsed. + The multipart form data. - Key and value are bytes. + If the content-type indicates non-form data or the form could not be parsed, this is set to + an empty `MultiDictView`. + + Modifications to the MultiDictView update `Request.content`, and vice versa. """ return multidict.MultiDictView( self._get_multipart_form, @@ -977,12 +1026,12 @@ class Response(Message): headers = headers elif isinstance(headers, dict): headers = Headers( - (always_bytes(k, "utf-8", "surrogateescape"), + (always_bytes(k, "utf-8", "surrogateescape"), # type: ignore always_bytes(v, "utf-8", "surrogateescape")) for k, v in headers.items() ) elif isinstance(headers, Iterable): - headers = Headers(headers) + headers = Headers(headers) # type: ignore else: raise TypeError("Expected headers to be an iterable or dict, but is {}.".format( type(headers).__name__ @@ -1023,7 +1072,8 @@ class Response(Message): @property def reason(self) -> str: """ - HTTP Reason Phrase, e.g. "Not Found". + HTTP reason phrase, for example "Not Found". + HTTP/2 responses do not contain a reason phrase, an empty string will be returned instead. """ # Encoding: http://stackoverflow.com/a/16674906/934719 @@ -1049,16 +1099,15 @@ class Response(Message): self.headers.set_all("set-cookie", cookie_headers) @property - def cookies(self) -> multidict.MultiDictView: + def cookies(self) -> multidict.MultiDictView[str, Tuple[str, multidict.MultiDict[str, Optional[str]]]]: """ - The response cookies. A possibly empty - :py:class:`~mitmproxy.net.multidict.MultiDictView`, where the keys are cookie - name strings, and values are (value, attr) tuples. Value is a string, - and attr is an MultiDictView containing cookie attributes. Within - attrs, unary attributes (e.g. HTTPOnly) are indicated by a Null value. + The response cookies. A possibly empty `MultiDictView`, where the keys are cookie + name strings, and values are `(cookie value, attributes)` tuples. Within + attributes, unary attributes (e.g. `HTTPOnly`) are indicated by a `None` value. + Modifications to the MultiDictView update `Response.headers`, and vice versa. - Caveats: - Updating the attr + *Warning:* Changes to `attributes` will not be picked up unless you also reassign + the `(cookie value, attributes)` tuple directly in the `MultiDictView`. """ return multidict.MultiDictView( self._get_cookies, @@ -1074,8 +1123,8 @@ class Response(Message): This fairly complex and heuristic function refreshes a server response for replay. - - It adjusts date, expires and last-modified headers. - - It adjusts cookie expiration. + - It adjusts date, expires, and last-modified headers. + - It adjusts cookie expiration. """ if not now: now = time.time() @@ -1108,19 +1157,17 @@ class HTTPFlow(flow.Flow): transaction. """ request: Request + """The client's HTTP request.""" response: Optional[Response] = None + """The server's HTTP response.""" error: Optional[flow.Error] = None """ + A connection or protocol error affecting this flow. + Note that it's possible for a Flow to have both a response and an error object. This might happen, for instance, when a response was received from the server, but there was an error sending it back to the client. """ - server_conn: connection.Server - client_conn: connection.Client - intercepted: bool = False - """ Is this flow currently being intercepted? """ - mode: str - """ What mode was the proxy layer in when receiving this request? """ def __init__(self, client_conn, server_conn, live=None, mode="regular"): super().__init__("http", client_conn, server_conn, live) @@ -1144,6 +1191,7 @@ class HTTPFlow(flow.Flow): @property def timestamp_start(self) -> float: + """*Read-only:* An alias for `Request.timestamp_start`.""" return self.request.timestamp_start def copy(self): diff --git a/mitmproxy/net/server_spec.py b/mitmproxy/net/server_spec.py index 75c0942cb..f117774f5 100644 --- a/mitmproxy/net/server_spec.py +++ b/mitmproxy/net/server_spec.py @@ -1,5 +1,5 @@ """ -Parse scheme, host and port from a string. +Server specs are used to describe an upstream proxy or server. """ import functools import re @@ -31,12 +31,12 @@ def parse(server_spec: str) -> ServerSpec: """ Parses a server mode specification, e.g.: - - http://example.com/ - - example.org - - example.com:443 + - http://example.com/ + - example.org + - example.com:443 - Raises: - ValueError, if the server specification is invalid. + *Raises:* + - ValueError, if the server specification is invalid. """ m = server_spec_re.match(server_spec) if not m: @@ -71,13 +71,10 @@ def parse(server_spec: str) -> ServerSpec: def parse_with_mode(mode: str) -> Tuple[str, ServerSpec]: """ - Parse a proxy mode specification, which is usually just (reverse|upstream):server-spec + Parse a proxy mode specification, which is usually just `(reverse|upstream):server-spec`. - Returns: - A (mode, server_spec) tuple. - - Raises: - ValueError, if the specification is invalid. + *Raises:* + - ValueError, if the specification is invalid. """ mode, server_spec = mode.split(":", maxsplit=1) return mode, parse(server_spec) diff --git a/mitmproxy/proxy/server_hooks.py b/mitmproxy/proxy/server_hooks.py index 592beb6ce..7afba799f 100644 --- a/mitmproxy/proxy/server_hooks.py +++ b/mitmproxy/proxy/server_hooks.py @@ -26,8 +26,12 @@ class ClientDisconnectedHook(commands.StartHook): @dataclass class ServerConnectionHookData: + """Event data for server connection event hooks.""" + server: connection.Server + """The server connection this hook is about.""" client: connection.Client + """The client on the other end.""" @dataclass diff --git a/mitmproxy/tcp.py b/mitmproxy/tcp.py index 264e46b39..ec3c78b67 100644 --- a/mitmproxy/tcp.py +++ b/mitmproxy/tcp.py @@ -1,5 +1,4 @@ import time - from typing import List from mitmproxy import flow @@ -7,6 +6,12 @@ from mitmproxy.coretypes import serializable class TCPMessage(serializable.Serializable): + """ + An individual TCP "message". + Note that TCP is *stream-based* and not *message-based*. + For practical purposes the stream is chunked into messages here, + but you should not rely on message boundaries. + """ def __init__(self, from_client, content, timestamp=None): self.from_client = from_client @@ -31,7 +36,6 @@ class TCPMessage(serializable.Serializable): class TCPFlow(flow.Flow): - """ A TCPFlow is a simplified representation of a TCP session. """ @@ -45,3 +49,9 @@ class TCPFlow(flow.Flow): def __repr__(self): return "".format(len(self.messages)) + + +__all__ = [ + "TCPFlow", + "TCPMessage", +] diff --git a/mitmproxy/websocket.py b/mitmproxy/websocket.py index 5fe715c83..e7137c0d8 100644 --- a/mitmproxy/websocket.py +++ b/mitmproxy/websocket.py @@ -1,35 +1,54 @@ -import time +""" +*Deprecation Notice:* Mitmproxy's WebSocket API is going to change soon, +see . +""" import queue +import time import warnings -from typing import List, Optional, Union +from typing import List +from typing import Optional +from typing import Union + +from mitmproxy import flow +from mitmproxy.coretypes import serializable +from mitmproxy.net import websocket +from mitmproxy.utils import human +from mitmproxy.utils import strutils from wsproto.frame_protocol import CloseReason from wsproto.frame_protocol import Opcode -from mitmproxy import flow -from mitmproxy.net import websocket -from mitmproxy.coretypes import serializable -from mitmproxy.utils import strutils, human - class WebSocketMessage(serializable.Serializable): """ A WebSocket message sent from one endpoint to the other. """ + type: Opcode + """indicates either TEXT or BINARY (from wsproto.frame_protocol.Opcode).""" + from_client: bool + """True if this messages was sent by the client.""" + content: Union[bytes, str] + """A byte-string representing the content of this message.""" + timestamp: float + """Timestamp of when this message was received or created.""" + + killed: bool + """True if this messages was killed and should not be sent to the other endpoint.""" + def __init__( - self, type: int, from_client: bool, content: Union[bytes, str], timestamp: Optional[float]=None, killed: bool=False + self, + type: int, + from_client: bool, + content: Union[bytes, str], + timestamp: Optional[float] = None, + killed: bool = False ) -> None: self.type = Opcode(type) # type: ignore - """indicates either TEXT or BINARY (from wsproto.frame_protocol.Opcode).""" self.from_client = from_client - """True if this messages was sent by the client.""" self.content = content - """A byte-string representing the content of this message.""" self.timestamp: float = timestamp or time.time() - """Timestamp of when this message was received or created.""" self.killed = killed - """True if this messages was killed and should not be sent to the other endpoint.""" @classmethod def from_state(cls, state): @@ -147,25 +166,3 @@ class WebSocketFlow(flow.Flow): direction="->" if message.from_client else "<-", endpoint=self.handshake_flow.request.path, ) - - def inject_message(self, endpoint, payload): - """ - Inject and send a full WebSocket message to the remote endpoint. - This might corrupt your WebSocket connection! Be careful! - - The endpoint needs to be either flow.client_conn or flow.server_conn. - - If ``payload`` is of type ``bytes`` then the message is flagged as - being binary If it is of type ``str`` encoded as UTF-8 and sent as - text. - - :param payload: The message body to send. - :type payload: ``bytes`` or ``str`` - """ - - if endpoint == self.client_conn: - self._inject_messages_client.put(payload) - elif endpoint == self.server_conn: - self._inject_messages_server.put(payload) - else: - raise ValueError('Invalid endpoint') diff --git a/setup.cfg b/setup.cfg index 5dfd81d4d..0550512e8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,10 @@ ignore_errors = True [mypy-test.*] ignore_errors = True +# https://github.com/python/mypy/issues/3004 +[mypy-http-modify-form,http-trailers] +ignore_errors = True + [tool:full_coverage] exclude = mitmproxy/tools/ diff --git a/setup.py b/setup.py index 6fa019616..467a29ba2 100644 --- a/setup.py +++ b/setup.py @@ -99,6 +99,7 @@ setup( 'dev': [ "hypothesis>=5.8,<6.2", "parver>=0.1,<2.0", + "pdoc>=4.0.0", "pytest-asyncio>=0.10.0,<0.14,!=0.14", "pytest-cov>=2.7.1,<3", "pytest-timeout>=1.3.3,<2", diff --git a/test/examples/test_examples.py b/test/examples/test_examples.py index ec4ca517c..32211fe88 100644 --- a/test/examples/test_examples.py +++ b/test/examples/test_examples.py @@ -10,7 +10,7 @@ from ..mitmproxy import tservers class TestScripts(tservers.MasterTest): def test_add_header(self, tdata): with taddons.context() as tctx: - a = tctx.script(tdata.path("../examples/addons/scripting-minimal-example.py")) + a = tctx.script(tdata.path("../examples/addons/anatomy2.py")) f = tflow.tflow() a.request(f) assert f.request.headers["myheader"] == "value" diff --git a/test/mitmproxy/test_http.py b/test/mitmproxy/test_http.py index e60229c1f..2d1cbb0ba 100644 --- a/test/mitmproxy/test_http.py +++ b/test/mitmproxy/test_http.py @@ -751,7 +751,7 @@ class TestHeaders: headers = Headers() assert len(headers) == 0 - headers = Headers([[b"Host", b"example.com"]]) + headers = Headers([(b"Host", b"example.com")]) assert len(headers) == 1 assert headers["Host"] == "example.com" @@ -760,14 +760,14 @@ class TestHeaders: assert headers["Host"] == "example.com" headers = Headers( - [[b"Host", b"invalid"]], + [(b"Host", b"invalid")], Host="example.com" ) assert len(headers) == 1 assert headers["Host"] == "example.com" headers = Headers( - [[b"Host", b"invalid"], [b"Accept", b"text/plain"]], + [(b"Host", b"invalid"), (b"Accept", b"text/plain")], Host="example.com" ) assert len(headers) == 2 @@ -775,7 +775,7 @@ class TestHeaders: assert headers["Accept"] == "text/plain" with pytest.raises(TypeError): - Headers([[b"Host", "not-bytes"]]) + Headers([(b"Host", "not-bytes")]) def test_set(self): headers = Headers() @@ -791,8 +791,8 @@ class TestHeaders: assert bytes(headers) == b"Host: example.com\r\n" headers = Headers([ - [b"Host", b"example.com"], - [b"Accept", b"text/plain"] + (b"Host", b"example.com"), + (b"Accept", b"text/plain") ]) assert bytes(headers) == b"Host: example.com\r\nAccept: text/plain\r\n" @@ -801,8 +801,8 @@ class TestHeaders: def test_iter(self): headers = Headers([ - [b"Set-Cookie", b"foo"], - [b"Set-Cookie", b"bar"] + (b"Set-Cookie", b"foo"), + (b"Set-Cookie", b"bar") ]) assert list(headers) == ["Set-Cookie"] @@ -816,9 +816,9 @@ class TestHeaders: def test_items(self): headers = Headers([ - [b"Set-Cookie", b"foo"], - [b"Set-Cookie", b"bar"], - [b'Accept', b'text/plain'], + (b"Set-Cookie", b"foo"), + (b"Set-Cookie", b"bar"), + (b'Accept', b'text/plain'), ]) assert list(headers.items()) == [ ('Set-Cookie', 'foo, bar'), diff --git a/test/mitmproxy/test_websocket.py b/test/mitmproxy/test_websocket.py index a9481e664..1ff98962e 100644 --- a/test/mitmproxy/test_websocket.py +++ b/test/mitmproxy/test_websocket.py @@ -86,15 +86,3 @@ class TestWebSocketFlow: d = tflow.twebsocketflow().handshake_flow.get_state() tnetstring.dump(d, b) assert b.getvalue() - - def test_inject_message(self): - f = tflow.twebsocketflow() - - with pytest.raises(ValueError): - f.inject_message(None, 'foobar') - - f.inject_message(f.client_conn, 'foobar') - assert f._inject_messages_client.qsize() == 1 - - f.inject_message(f.server_conn, 'foobar') - assert f._inject_messages_client.qsize() == 1