From fdb6a44245249a50b5c95cdf0d8d13ecddfe5726 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 12 Oct 2016 10:57:05 +1300 Subject: [PATCH 01/12] docs: cleanups improvements and fighting sphinx - Hide links to internal code listings, and link to github instead - Improve formatting of code/example captions - Fix outdated documentation of command-line options - Complete documentation of all events + improved formatting - tcp_open -> tcp_start, tcp_close -> tcp_end to reduce confusion --- docs/_static/theme_overrides.css | 29 ++++ docs/conf.py | 45 +++++- docs/dev/models.rst | 81 ---------- docs/features/clientreplay.rst | 2 +- docs/features/responsestreaming.rst | 6 +- docs/features/serverreplay.rst | 2 +- docs/index.rst | 7 +- docs/scripting/api.rst | 8 + docs/scripting/context.rst | 4 + docs/scripting/events.rst | 202 +++++++++++++++++++++++++ docs/scripting/inlinescripts.rst | 227 ---------------------------- docs/scripting/mitmproxy.rst | 26 ---- docs/scripting/overview.rst | 79 ++++++++++ mitmproxy/builtins/filestreamer.py | 4 +- mitmproxy/controller.py | 4 +- mitmproxy/flow/master.py | 11 +- mitmproxy/models/__init__.py | 4 +- mitmproxy/protocol/rawtcp.py | 4 +- 18 files changed, 389 insertions(+), 356 deletions(-) delete mode 100644 docs/dev/models.rst create mode 100644 docs/scripting/api.rst create mode 100644 docs/scripting/context.rst create mode 100644 docs/scripting/events.rst delete mode 100644 docs/scripting/inlinescripts.rst delete mode 100644 docs/scripting/mitmproxy.rst create mode 100644 docs/scripting/overview.rst diff --git a/docs/_static/theme_overrides.css b/docs/_static/theme_overrides.css index 585fdddb0..849f9f250 100644 --- a/docs/_static/theme_overrides.css +++ b/docs/_static/theme_overrides.css @@ -4,6 +4,10 @@ white-space: normal; } +.wy-table-responsive > table > tbody > tr > td { + vertical-align: top !important; +} + .wy-table-responsive { margin-bottom: 24px; max-width: 100%; @@ -13,3 +17,28 @@ .wy-menu-vertical header, .wy-menu-vertical p.caption { color: #e0e0e0; } + +.code-block-caption { + height: 1.5em; +} + +.code-block-caption .caption-text { + font-size: 0.8em; + float: right; +} + +.code-block-caption .headerlink { + display: none !important; +} + +.function .headerlink { + display: none !important; +} + +dl .reference.internal { + display: none !important; +} + +dl .headerlink { + display: none !important; +} diff --git a/docs/conf.py b/docs/conf.py index 54a353acd..76dc83d4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,11 +1,16 @@ import sys import os +import importlib +import inspect sys.path.insert(0, os.path.abspath('..')) import netlib.version + extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', + 'sphinx.ext.extlinks', + 'sphinx.ext.linkcode', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', 'sphinxcontrib.documentedlist' @@ -156,7 +161,7 @@ html_static_path = ['_static'] #html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +html_show_sourcelink = False # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True @@ -189,5 +194,43 @@ html_static_path = ['_static'] # Output file base name for HTML help builder. htmlhelp_basename = 'mitmproxydoc' + +# FIXME: change master to dynamic version before release +extlinks = dict( + src = ('https://github.com/mitmproxy/mitmproxy/blob/master/%s', '') +) + + +MODULE = "/mitmproxy/" + +def linkcode_resolve(domain, info): + if domain != 'py': + return None + module, fullname = info['module'], info['fullname'] + # TODO: attributes/properties don't have modules, maybe try to look + # them up based on their cached host object? + if not module: + return None + obj = importlib.import_module(module) + for item in fullname.split('.'): + obj = getattr(obj, item, None) + if obj is None: + return None + try: + obj = getattr(obj, '_orig') + except AttributeError: + pass + try: + obj_source_path = inspect.getsourcefile(obj) + _, line = inspect.getsourcelines(obj) + except (TypeError, IOError): + # obj doesn't have a module, or something + return None + off = obj_source_path.rfind(MODULE) + mpath = obj_source_path[off + len(MODULE):] + print(obj_source_path, mpath) + return "https://github.com/mitmproxy/mitmproxy/blob/master/%s" % mpath + + def setup(app): app.add_stylesheet('theme_overrides.css') diff --git a/docs/dev/models.rst b/docs/dev/models.rst deleted file mode 100644 index a333fb06a..000000000 --- a/docs/dev/models.rst +++ /dev/null @@ -1,81 +0,0 @@ -.. _models: - -Datastructures -============== - -.. automodule:: mitmproxy.models - :members: HTTPFlow, HTTPRequest, HTTPResponse - - -.. automodule:: netlib.http - - .. autoclass:: Request - - .. rubric:: Data - .. autoattribute:: first_line_format - .. autoattribute:: method - .. autoattribute:: scheme - .. autoattribute:: host - .. autoattribute:: port - .. autoattribute:: path - .. autoattribute:: http_version - .. autoattribute:: headers - .. autoattribute:: content - .. autoattribute:: timestamp_start - .. autoattribute:: timestamp_end - .. rubric:: Computed Properties and Convenience Methods - .. autoattribute:: text - .. autoattribute:: url - .. autoattribute:: pretty_host - .. autoattribute:: pretty_url - .. autoattribute:: query - .. autoattribute:: cookies - .. autoattribute:: path_components - .. automethod:: anticache - .. automethod:: anticomp - .. automethod:: constrain_encoding - .. autoattribute:: urlencoded_form - .. autoattribute:: multipart_form - - .. autoclass:: Response - - .. automethod:: make - - .. rubric:: Data - .. autoattribute:: http_version - .. autoattribute:: status_code - .. autoattribute:: reason - .. autoattribute:: headers - .. autoattribute:: content - .. autoattribute:: timestamp_start - .. autoattribute:: timestamp_end - .. rubric:: Computed Properties and Convenience Methods - .. autoattribute:: text - .. autoattribute:: cookies - - .. autoclass:: Headers - :members: - :special-members: - :no-undoc-members: - -.. automodule:: netlib.multidict - - .. autoclass:: MultiDictView - - .. automethod:: get_all - .. automethod:: set_all - .. automethod:: add - .. automethod:: insert - .. automethod:: keys - .. automethod:: values - .. automethod:: items - .. automethod:: to_dict - -.. autoclass:: mitmproxy.models.Error - :show-inheritance: - -.. autoclass:: mitmproxy.models.ServerConnection - :show-inheritance: - -.. autoclass:: mitmproxy.models.ClientConnection - :show-inheritance: \ No newline at end of file diff --git a/docs/features/clientreplay.rst b/docs/features/clientreplay.rst index 50740bcfa..ebe40b5f8 100644 --- a/docs/features/clientreplay.rst +++ b/docs/features/clientreplay.rst @@ -14,5 +14,5 @@ You may want to use client-side replay in conjunction with the ================== =========== command-line ``-c path`` -mitmproxy shortcut :kbd:`c` +mitmproxy shortcut :kbd:`R` then :kbd:`c` ================== =========== diff --git a/docs/features/responsestreaming.rst b/docs/features/responsestreaming.rst index 66b5cae0e..1d5726c4e 100644 --- a/docs/features/responsestreaming.rst +++ b/docs/features/responsestreaming.rst @@ -35,10 +35,10 @@ command-line ``--stream SIZE`` Customizing Response Streaming ------------------------------ -You can also use an :ref:`inlinescripts` to customize exactly -which responses are streamed. +You can also use a script to customize exactly which responses are streamed. -Responses that should be tagged for streaming by setting their ``.stream`` attribute to ``True``: +Responses that should be tagged for streaming by setting their ``.stream`` +attribute to ``True``: .. literalinclude:: ../../examples/stream.py :caption: examples/stream.py diff --git a/docs/features/serverreplay.rst b/docs/features/serverreplay.rst index d70b6514e..f545d4a53 100644 --- a/docs/features/serverreplay.rst +++ b/docs/features/serverreplay.rst @@ -35,5 +35,5 @@ the :kbd:`o` options shortcut within :program:`mitmproxy`. ================== =========== command-line ``-S path`` -mitmproxy shortcut :kbd:`S` +mitmproxy shortcut :kbd:`R` then :kbd:`s` ================== =========== diff --git a/docs/index.rst b/docs/index.rst index 143f79f73..9a948678a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,9 +51,10 @@ :hidden: :caption: Scripting - scripting/inlinescripts - dev/models - scripting/mitmproxy + scripting/overview + scripting/context + scripting/events + scripting/api .. toctree:: diff --git a/docs/scripting/api.rst b/docs/scripting/api.rst new file mode 100644 index 000000000..03d359206 --- /dev/null +++ b/docs/scripting/api.rst @@ -0,0 +1,8 @@ +.. _api: + +API +==== + +.. automodule:: mitmproxy.models.http + :inherited-members: + :members: HTTPFlow, HTTPRequest, HTTPResponse diff --git a/docs/scripting/context.rst b/docs/scripting/context.rst new file mode 100644 index 000000000..7c3515983 --- /dev/null +++ b/docs/scripting/context.rst @@ -0,0 +1,4 @@ +.. _context: + +Context +======= diff --git a/docs/scripting/events.rst b/docs/scripting/events.rst new file mode 100644 index 000000000..c16c01f62 --- /dev/null +++ b/docs/scripting/events.rst @@ -0,0 +1,202 @@ +.. _events: + +Events +======= + +General +------- + +.. list-table:: + :widths: 40 60 + :header-rows: 0 + + * - .. py:function:: configure(options, updated) + - Called once on startup, and whenever options change. + + *options* + An ``options.Options`` object with the total current configuration + state of mitmproxy. + *updated* + A set of strings indicating which configuration options have been + updated. This contains all options when *configure* is called on + startup, and only changed options subsequently. + + * - .. py:function:: done() + - Called once when the script shuts down, either because it's been + unloaded, or because the proxy itself is shutting down. + + * - .. py:function:: log(entry) + - Called whenever an event log is added. + + *entry* + An ``controller.LogEntry`` object - ``entry.msg`` is the log text, + and ``entry.level`` is the urgency level ("debug", "info", "warn", + "error"). + + * - .. py:function:: start() + - Called once on startup, before any other events. If you return a + value from this event, it will replace the current addon. This + allows you to, "boot into" an addon implemented as a class instance + from the module level. + + * - .. py:function:: tick() + - Called at a regular sub-second interval as long as the addon is + executing. + + +Connection +---------- + +.. list-table:: + :widths: 40 60 + :header-rows: 0 + + * - .. py:function:: clientconnect(root_layer) + - Called when a client initiates a connection to the proxy. Note that a + connection can correspond to multiple HTTP requests. + + *root_layer* + The root layer (see `mitmproxy.protocol` for an explanation what + the root layer is), provides transparent access to all attributes + of the :py:class:`~mitmproxy.proxy.RootContext`. For example, + ``root_layer.client_conn.address`` gives the remote address of the + connecting client. + + * - .. py:function:: clientdisconnect(root_layer) + - Called when a client disconnects from the proxy. + + *root_layer* + The root layer object. + + * - .. py:function:: next_layer(layer) + + - Called whenever layers are switched. You may change which layer will + be used by returning a new layer object from this event. + + *layer* + The next layer, as determined by mitmpmroxy. + + * - .. py:function:: serverconnect(server_conn) + - Called before the proxy initiates a connection to the target server. + Note that a connection can correspond to multiple HTTP requests. + + *server_conn* + A ``ServerConnection`` object. It is guaranteed to have a non-None + ``address`` attribute. + + * - .. py:function:: serverdisconnect(server_conn) + - Called when the proxy has closed the server connection. + + *server_conn* + A ``ServerConnection`` object. + + +HTTP Events +----------- + +.. list-table:: + :widths: 40 60 + :header-rows: 0 + + * - .. py:function:: request(flow) + - Called when a client request has been received. + + *flow* + A ``models.HTTPFlow`` object. At this point, the flow is + guaranteed to have a non-None ``request`` attribute. + + * - .. py:function:: requestheaders(flow) + - Called when the headers of a client request have been received, but + before the request body is read. + + *flow* + A ``models.HTTPFlow`` object. At this point, the flow is + guaranteed to have a non-None ``request`` attribute. + + * - .. py:function:: responseheaders(flow) + + - Called when the headers of a server response have been received, but + before the response body is read. + + *flow* + A ``models.HTTPFlow`` object. At this point, the flow is + guaranteed to have a non-none ``request`` and ``response`` + attributes, however the response will have no content. + + * - .. py:function:: response(flow) + + - Called when a server response has been received. + + *flow* + A ``models.HTTPFlow`` object. At this point, the flow is + guaranteed to have a non-none ``request`` and ``response`` + attributes. The raw response body will be in ``response.body``, + unless response streaming has been enabled. + + * - .. py:function:: error(flow) + - Called when a flow error has occurred, e.g. invalid server responses, + or interrupted connections. This is distinct from a valid server HTTP + error response, which is simply a response with an HTTP error code. + + *flow* + The flow containing the error. It is guaranteed to have + non-None ``error`` attribute. + + +WebSocket Events +----------------- + +.. list-table:: + :widths: 40 60 + :header-rows: 0 + + * - .. py:function:: websockets_handshake(flow) + + - Called when a client wants to establish a WebSockets connection. The + WebSockets-specific headers can be manipulated to manipulate the + handshake. The ``flow`` object is guaranteed to have a non-None + ``request`` attribute. + + *flow* + The flow containing the HTTP websocket handshake request. The + object is guaranteed to have a non-None ``request`` attribute. + + +TCP Events +---------- + +These events are called only if the connection is in :ref:`TCP mode +`. So, for instance, TCP events are not called for ordinary HTTP/S +connections. + +.. list-table:: + :widths: 40 60 + :header-rows: 0 + + * - .. py:function:: tcp_end(flow) + - Called when TCP streaming ends. + + *flow* + A ``models.TCPFlow`` object. + + * - .. py:function:: tcp_error(flow) + - Called when a TCP error occurs - e.g. the connection closing + unexpectedly. + + *flow* + A ``models.TCPFlow`` object. + + * - .. py:function:: tcp_message(flow) + + - Called a TCP payload is received from the client or server. The + sender and receiver are identifiable. The most recent message will be + ``flow.messages[-1]``. The message is user-modifiable. + + *flow* + A ``models.TCPFlow`` object. + + * - .. py:function:: tcp_start(flow) + - Called when TCP streaming starts. + + *flow* + A ``models.TCPFlow`` object. diff --git a/docs/scripting/inlinescripts.rst b/docs/scripting/inlinescripts.rst deleted file mode 100644 index 74d4e7144..000000000 --- a/docs/scripting/inlinescripts.rst +++ /dev/null @@ -1,227 +0,0 @@ -.. _inlinescripts: - -Inline Scripts -============== - -**mitmproxy** has a powerful scripting API that allows you to modify flows -on-the-fly or rewrite previously saved flows locally. - -The mitmproxy scripting API is event driven - a script is simply a Python -module that exposes a set of event methods. Here's a complete mitmproxy script -that adds a new header to every HTTP response before it is returned to the -client: - -.. literalinclude:: ../../examples/add_header.py - :caption: examples/add_header.py - :language: python - -All events that deal with an HTTP request get an instance of :py:class:`~mitmproxy.models.HTTPFlow`, -which we can use to manipulate the response itself. - -We can now run this script using mitmdump or mitmproxy as follows: - ->>> mitmdump -s add_header.py - -The new header will be added to all responses passing through the proxy. - -Examples --------- - -mitmproxy comes with a variety of example inline scripts, which demonstrate many basic tasks. -We encourage you to either browse them locally or on `GitHub`_. - - -Events ------- - -Script Lifecycle Events -^^^^^^^^^^^^^^^^^^^^^^^ - -.. py:function:: start(context) - - Called once on startup, before any other events. - - :param List[str] argv: The inline scripts' arguments. - For example, ``mitmproxy -s 'example.py --foo 42'`` sets argv to ``["--foo", "42"]``. - -.. py:function:: done(context) - - Called once on script shutdown, after any other events. - -Connection Events -^^^^^^^^^^^^^^^^^ - -.. py:function:: clientconnect(context, root_layer) - - Called when a client initiates a connection to the proxy. Note that - a connection can correspond to multiple HTTP requests. - - .. versionchanged:: 0.14 - - :param Layer root_layer: The root layer, which provides transparent access to all attributes of the - :py:class:`~mitmproxy.proxy.RootContext`. For example, ``root_layer.client_conn.address`` - gives the remote address of the connecting client. - -.. py:function:: clientdisconnect(context, root_layer) - - Called when a client disconnects from the proxy. - - .. versionchanged:: 0.14 - - :param Layer root_layer: see :py:func:`clientconnect` - -.. py:function:: serverconnect(context, server_conn) - - Called before the proxy initiates a connection to the target server. Note that - a connection can correspond to multiple HTTP requests. - - :param ServerConnection server_conn: The server connection object. It is guaranteed to have a - non-None ``address`` attribute. - -.. py:function:: serverdisconnect(context, server_conn) - - Called when the proxy has closed the server connection. - - .. versionadded:: 0.14 - - :param ServerConnection server_conn: see :py:func:`serverconnect` - -HTTP Events -^^^^^^^^^^^ - -.. py:function:: request(context, flow) - - Called when a client request has been received. The ``flow`` object is - guaranteed to have a non-None ``request`` attribute. - - :param HTTPFlow flow: The flow containing the request which has been received. - The object is guaranteed to have a non-None ``request`` attribute. - -.. py:function:: responseheaders(context, flow) - - Called when the headers of a server response have been received. - This will always be called before the response hook. - - :param HTTPFlow flow: The flow containing the request and response. - The object is guaranteed to have non-None ``request`` and - ``response`` attributes. ``response.content`` will be ``None``, - as the response body has not been read yet. - -.. py:function:: response(context, flow) - - Called when a server response has been received. - - :param HTTPFlow flow: The flow containing the request and response. - The object is guaranteed to have non-None ``request`` and - ``response`` attributes. ``response.body`` will contain the raw response body, - unless response streaming has been enabled. - -.. py:function:: error(context, flow) - - Called when a flow error has occurred, e.g. invalid server responses, or - interrupted connections. This is distinct from a valid server HTTP error - response, which is simply a response with an HTTP error code. - - :param HTTPFlow flow: The flow containing the error. - It is guaranteed to have non-None ``error`` attribute. - -WebSockets Events -^^^^^^^^^^^^^^^^^ - -.. py:function:: websocket_handshake(context, flow) - - Called when a client wants to establish a WebSockets connection. - The WebSockets-specific headers can be manipulated to manipulate the handshake. - The ``flow`` object is guaranteed to have a non-None ``request`` attribute. - - :param HTTPFlow flow: The flow containing the request which has been received. - The object is guaranteed to have a non-None ``request`` attribute. - -TCP Events -^^^^^^^^^^ - -.. py:function:: tcp_message(context, tcp_msg) - - .. warning:: API is subject to change - - If the proxy is in :ref:`TCP mode `, this event is called when it - receives a TCP payload from the client or server. - - The sender and receiver are identifiable. The message is user-modifiable. - - :param TcpMessage tcp_msg: see *examples/tcp_message.py* - -API ---- - -The canonical API documentation is the code, which you can browse here, locally or on `GitHub`_. -*Use the Source, Luke!* - -The main classes you will deal with in writing mitmproxy scripts are: - -:py:class:`mitmproxy.flow.FlowMaster` - - The "heart" of mitmproxy, usually subclassed as :py:class:`mitmproxy.dump.DumpMaster` or - :py:class:`mitmproxy.console.ConsoleMaster`. -:py:class:`~mitmproxy.models.ClientConnection` - - Describes a client connection. -:py:class:`~mitmproxy.models.ServerConnection` - - Describes a server connection. -:py:class:`~mitmproxy.models.HTTPFlow` - - A collection of objects representing a single HTTP transaction. -:py:class:`~mitmproxy.models.HTTPRequest` - - An HTTP request. -:py:class:`~mitmproxy.models.HTTPResponse` - - An HTTP response. -:py:class:`~mitmproxy.models.Error` - - A communications error. -:py:class:`netlib.http.Headers` - - A dictionary-like object for managing HTTP headers. -:py:class:`netlib.certutils.SSLCert` - - Exposes information SSL certificates. - - -Running scripts in parallel ---------------------------- - -We have a single flow primitive, so when a script is blocking, other requests are not processed. -While that's usually a very desirable behaviour, blocking scripts can be run threaded by using the -:py:obj:`mitmproxy.script.concurrent` decorator. -**If your script does not block, you should avoid the overhead of the decorator.** - -.. literalinclude:: ../../examples/nonblocking.py - :caption: examples/nonblocking.py - :language: python - -Make scripts configurable with arguments ----------------------------------------- - -Sometimes, you want to pass runtime arguments to the inline script. This can be simply done by -surrounding the script call with quotes, e.g. ```mitmdump -s 'script.py --foo 42'``. -The arguments are then exposed in the start event: - -.. literalinclude:: ../../examples/modify_response_body.py - :caption: examples/modify_response_body.py - :language: python - -Running scripts on saved flows ------------------------------- - -Sometimes, we want to run a script on :py:class:`~mitmproxy.models.Flow` objects that are already -complete. This happens when you start a script, and then load a saved set of flows from a file -(see the "scripted data transformation" example :ref:`here `). -It also happens when you run a one-shot script on a single flow through the ``|`` (pipe) shortcut -in mitmproxy. - -In this case, there are no client connections, and the events are run in the following order: -**start**, **request**, **responseheaders**, **response**, **error**, **done**. -If the flow doesn't have a **response** or **error** associated with it, the matching events will -be skipped. - -Spaces in the script path -------------------------- - -By default, spaces are interpreted as a separator between the inline script and its arguments -(e.g. ``-s 'foo.py 42'``). Consequently, the script path needs to be wrapped in a separate pair of -quotes if it contains spaces: ``-s '\'./foo bar/baz.py\' 42'``. - -.. _GitHub: https://github.com/mitmproxy/mitmproxy diff --git a/docs/scripting/mitmproxy.rst b/docs/scripting/mitmproxy.rst deleted file mode 100644 index 9e3317360..000000000 --- a/docs/scripting/mitmproxy.rst +++ /dev/null @@ -1,26 +0,0 @@ - -FlowMaster -========== - -.. note:: - - We strongly encourage you to use :ref:`inlinescripts` rather than subclassing mitmproxy's FlowMaster. - - Inline Scripts are equally powerful and provide an easier syntax. - - Most examples are written as inline scripts. - - Multiple inline scripts can be used together. - - Inline Scripts can either be executed headless with mitmdump or within the mitmproxy UI. - - -All of mitmproxy's basic functionality is exposed through the **mitmproxy** -library. The example below shows a simple implementation of the "sticky cookie" -functionality included in the interactive mitmproxy program. Traffic is -monitored for ``Cookie`` and ``Set-Cookie`` headers, and requests are rewritten -to include a previously seen cookie if they don't already have one. In effect, -this lets you log in to a site using your browser, and then make subsequent -requests using a tool like curl, which will then seem to be part of the -authenticated session. - - -.. literalinclude:: ../../examples/stickycookies - :caption: examples/stickycookies - :language: python diff --git a/docs/scripting/overview.rst b/docs/scripting/overview.rst new file mode 100644 index 000000000..a0dfe111c --- /dev/null +++ b/docs/scripting/overview.rst @@ -0,0 +1,79 @@ +.. _overview: + +Overview +========= + +Mitmproxy has a powerful scripting API that allows you to control almost any +aspect of traffic being proxied. In fact, much of mitmproxy's own core +functionality is implemented using the exact same API exposed to scripters (see +:src:`mitmproxy/builtins`). + +Scripting is event driven, with named handlers on the script object called at +appropriate points of mitmproxy's operation. Here's a complete mitmproxy script +that adds a new header to every HTTP response before it is returned to the +client: + +.. literalinclude:: ../../examples/add_header.py + :caption: :src:`examples/add_header.py` + :language: python + +All events that deal with an HTTP request get an instance of +:py:class:`~mitmproxy.models.HTTPFlow`, which we can use to manipulate the +response itself. We can now run this script using mitmdump or mitmproxy as +follows: + +>>> mitmdump -s add_header.py + +The new header will be added to all responses passing through the proxy. + + +mitmproxy comes with a variety of example inline scripts, which demonstrate +many basic tasks. + + +Running scripts in parallel +--------------------------- + +We have a single flow primitive, so when a script is blocking, other requests are not processed. +While that's usually a very desirable behaviour, blocking scripts can be run threaded by using the +:py:obj:`mitmproxy.script.concurrent` decorator. +**If your script does not block, you should avoid the overhead of the decorator.** + +.. literalinclude:: ../../examples/nonblocking.py + :caption: examples/nonblocking.py + :language: python + +Make scripts configurable with arguments +---------------------------------------- + +Sometimes, you want to pass runtime arguments to the inline script. This can be simply done by +surrounding the script call with quotes, e.g. ```mitmdump -s 'script.py --foo 42'``. +The arguments are then exposed in the start event: + +.. literalinclude:: ../../examples/modify_response_body.py + :caption: examples/modify_response_body.py + :language: python + + +Running scripts on saved flows +------------------------------ + +Sometimes, we want to run a script on :py:class:`~mitmproxy.models.Flow` objects that are already +complete. This happens when you start a script, and then load a saved set of flows from a file +(see the "scripted data transformation" example :ref:`here `). +It also happens when you run a one-shot script on a single flow through the ``|`` (pipe) shortcut +in mitmproxy. + +In this case, there are no client connections, and the events are run in the following order: +**start**, **request**, **responseheaders**, **response**, **error**, **done**. +If the flow doesn't have a **response** or **error** associated with it, the matching events will +be skipped. + +Spaces in the script path +------------------------- + +By default, spaces are interpreted as a separator between the inline script and its arguments +(e.g. ``-s 'foo.py 42'``). Consequently, the script path needs to be wrapped in a separate pair of +quotes if it contains spaces: ``-s '\'./foo bar/baz.py\' 42'``. + +.. _GitHub: https://github.com/mitmproxy/mitmproxy diff --git a/mitmproxy/builtins/filestreamer.py b/mitmproxy/builtins/filestreamer.py index 3436e0760..bf5eedee4 100644 --- a/mitmproxy/builtins/filestreamer.py +++ b/mitmproxy/builtins/filestreamer.py @@ -40,11 +40,11 @@ class FileStreamer: if err: raise exceptions.OptionsError(err) - def tcp_open(self, flow): + def tcp_start(self, flow): if self.stream: self.active_flows.add(flow) - def tcp_close(self, flow): + def tcp_end(self, flow): if self.stream: self.stream.add(flow) self.active_flows.discard(flow) diff --git a/mitmproxy/controller.py b/mitmproxy/controller.py index 1a8801e1f..7b9d460ac 100644 --- a/mitmproxy/controller.py +++ b/mitmproxy/controller.py @@ -19,10 +19,10 @@ Events = frozenset([ "serverconnect", "serverdisconnect", - "tcp_open", + "tcp_start", "tcp_message", "tcp_error", - "tcp_close", + "tcp_end", "request", "requestheaders", diff --git a/mitmproxy/flow/master.py b/mitmproxy/flow/master.py index a2b225b86..a1b0a6311 100644 --- a/mitmproxy/flow/master.py +++ b/mitmproxy/flow/master.py @@ -6,6 +6,7 @@ import sys from typing import Optional # noqa import netlib.exceptions +from netlib import http from mitmproxy import controller from mitmproxy import exceptions from mitmproxy import models @@ -29,13 +30,13 @@ def event_sequence(f): messages = f.messages f.messages = [] f.reply = controller.DummyReply() - yield "tcp_open", f + yield "tcp_start", f while messages: f.messages.append(messages.pop(0)) yield "tcp_message", f if f.error: yield "tcp_error", f - yield "tcp_close", f + yield "tcp_end", f else: raise NotImplementedError @@ -83,7 +84,7 @@ class FlowMaster(controller.Master): s = models.ServerConnection.make_dummy((host, port)) f = models.HTTPFlow(c, s) - headers = models.Headers() + headers = http.Headers() req = models.HTTPRequest( "absolute", @@ -261,7 +262,7 @@ class FlowMaster(controller.Master): self.state.update_flow(f) @controller.handler - def tcp_open(self, flow): + def tcp_start(self, flow): # TODO: This would break mitmproxy currently. # self.state.add_flow(flow) pass @@ -275,5 +276,5 @@ class FlowMaster(controller.Master): pass @controller.handler - def tcp_close(self, flow): + def tcp_end(self, flow): pass diff --git a/mitmproxy/models/__init__.py b/mitmproxy/models/__init__.py index 9bd197233..7b7eaef0c 100644 --- a/mitmproxy/models/__init__.py +++ b/mitmproxy/models/__init__.py @@ -4,7 +4,7 @@ from netlib.http import decoded from .connections import ClientConnection, ServerConnection from .flow import Flow, Error from .http import ( - HTTPFlow, HTTPRequest, HTTPResponse, Headers, + HTTPFlow, HTTPRequest, HTTPResponse, make_error_response, make_connect_request, make_connect_response, expect_continue_response ) from .tcp import TCPFlow @@ -15,7 +15,7 @@ FLOW_TYPES = dict( ) __all__ = [ - "HTTPFlow", "HTTPRequest", "HTTPResponse", "Headers", "decoded", + "HTTPFlow", "HTTPRequest", "HTTPResponse", "decoded", "make_error_response", "make_connect_request", "make_connect_response", "expect_continue_response", "ClientConnection", "ServerConnection", diff --git a/mitmproxy/protocol/rawtcp.py b/mitmproxy/protocol/rawtcp.py index 70486cc42..069420ea6 100644 --- a/mitmproxy/protocol/rawtcp.py +++ b/mitmproxy/protocol/rawtcp.py @@ -23,7 +23,7 @@ class RawTCPLayer(base.Layer): if not self.ignore: flow = models.TCPFlow(self.client_conn, self.server_conn, self) - self.channel.ask("tcp_open", flow) + self.channel.ask("tcp_start", flow) buf = memoryview(bytearray(self.chunk_size)) @@ -64,4 +64,4 @@ class RawTCPLayer(base.Layer): self.channel.tell("tcp_error", flow) finally: if not self.ignore: - self.channel.tell("tcp_close", flow) + self.channel.tell("tcp_end", flow) From c8f2f1019dbd0e07ad6178b68bd32d88fb32a0cb Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Wed, 12 Oct 2016 10:57:40 +1300 Subject: [PATCH 02/12] Clean up models.http a bit - We don't need a deprecation warning here - Bring imports inline with policy --- mitmproxy/models/http.py | 48 ++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/mitmproxy/models/http.py b/mitmproxy/models/http.py index d56eb29af..8c5524e27 100644 --- a/mitmproxy/models/http.py +++ b/mitmproxy/models/http.py @@ -1,30 +1,14 @@ from __future__ import absolute_import, print_function, division import cgi -import warnings -from mitmproxy.models.flow import Flow +from mitmproxy.models import flow +from netlib import http from netlib import version -from netlib.http import Headers -from netlib.http import Request -from netlib.http import Response -from netlib.http import status_codes -from netlib.tcp import Address +from netlib import tcp -class MessageMixin(object): - - def get_decoded_content(self): - """ - Returns the decoded content based on the current Content-Encoding - header. - Doesn't change the message iteself or its headers. - """ - warnings.warn(".get_decoded_content() is deprecated, please use .content directly instead.", DeprecationWarning) - return self.content - - -class HTTPRequest(MessageMixin, Request): +class HTTPRequest(http.Request): """ A mitmproxy HTTP request. @@ -49,7 +33,7 @@ class HTTPRequest(MessageMixin, Request): stickycookie=False, stickyauth=False, ): - Request.__init__( + http.Request.__init__( self, first_line_format, method, @@ -110,7 +94,7 @@ class HTTPRequest(MessageMixin, Request): return id(self) -class HTTPResponse(MessageMixin, Response): +class HTTPResponse(http.Response): """ A mitmproxy HTTP response. @@ -129,7 +113,7 @@ class HTTPResponse(MessageMixin, Response): timestamp_end=None, is_replay=False ): - Response.__init__( + http.Response.__init__( self, http_version, status_code, @@ -161,7 +145,7 @@ class HTTPResponse(MessageMixin, Response): return resp -class HTTPFlow(Flow): +class HTTPFlow(flow.Flow): """ A HTTPFlow is a collection of objects representing a single HTTP @@ -188,7 +172,7 @@ class HTTPFlow(Flow): self.response = None """@type: HTTPResponse""" - _stateobject_attributes = Flow._stateobject_attributes.copy() + _stateobject_attributes = flow.Flow._stateobject_attributes.copy() _stateobject_attributes.update( request=HTTPRequest, response=HTTPResponse @@ -225,7 +209,7 @@ class HTTPFlow(Flow): def make_error_response(status_code, message, headers=None): - response = status_codes.RESPONSES.get(status_code, "Unknown") + response = http.status_codes.RESPONSES.get(status_code, "Unknown") body = """ @@ -237,7 +221,7 @@ def make_error_response(status_code, message, headers=None): body = body.encode("utf8", "replace") if not headers: - headers = Headers( + headers = http.Headers( Server=version.MITMPROXY, Connection="close", Content_Length=str(len(body)), @@ -254,10 +238,10 @@ def make_error_response(status_code, message, headers=None): def make_connect_request(address): - address = Address.wrap(address) + address = tcp.Address.wrap(address) return HTTPRequest( "authority", b"CONNECT", None, address.host, address.port, None, b"HTTP/1.1", - Headers(), b"" + http.Headers(), b"" ) @@ -268,8 +252,10 @@ def make_connect_response(http_version): http_version, 200, b"Connection established", - Headers(), + http.Headers(), b"", ) -expect_continue_response = HTTPResponse(b"HTTP/1.1", 100, b"Continue", Headers(), b"") +expect_continue_response = HTTPResponse( + b"HTTP/1.1", 100, b"Continue", http.Headers(), b"" +) From 61040a7bcd46c057e34fe4671ef20b9111649e74 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Fri, 14 Oct 2016 12:18:56 +1300 Subject: [PATCH 03/12] docs: improve external source links, tweak code docs --- docs/conf.py | 29 ++++++++++++++------------- docs/scripting/api.rst | 22 +++++++++++++++++---- mitmproxy/models/http.py | 42 +++++++++++++++++++++------------------- 3 files changed, 54 insertions(+), 39 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 76dc83d4d..ef5f05566 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -195,20 +195,19 @@ html_show_sourcelink = False htmlhelp_basename = 'mitmproxydoc' +SRCBASE = "https://github.com/mitmproxy/mitmproxy/blob/master" + + # FIXME: change master to dynamic version before release extlinks = dict( - src = ('https://github.com/mitmproxy/mitmproxy/blob/master/%s', '') + src = (SRCBASE + r"/%s", '') ) -MODULE = "/mitmproxy/" - def linkcode_resolve(domain, info): if domain != 'py': return None module, fullname = info['module'], info['fullname'] - # TODO: attributes/properties don't have modules, maybe try to look - # them up based on their cached host object? if not module: return None obj = importlib.import_module(module) @@ -217,19 +216,19 @@ def linkcode_resolve(domain, info): if obj is None: return None try: - obj = getattr(obj, '_orig') - except AttributeError: - pass - try: - obj_source_path = inspect.getsourcefile(obj) + spath = inspect.getsourcefile(obj) _, line = inspect.getsourcelines(obj) except (TypeError, IOError): - # obj doesn't have a module, or something return None - off = obj_source_path.rfind(MODULE) - mpath = obj_source_path[off + len(MODULE):] - print(obj_source_path, mpath) - return "https://github.com/mitmproxy/mitmproxy/blob/master/%s" % mpath + if spath.rfind("netlib") > -1: + off = spath.rfind("netlib") + mpath = spath[off:] + elif spath.rfind("mitmproxy") > -1: + off = spath.rfind("mitmproxy") + mpath = spath[off:] + else: + return None + return SRCBASE + "/%s#L%s" % (mpath, line) def setup(app): diff --git a/docs/scripting/api.rst b/docs/scripting/api.rst index 03d359206..a17693294 100644 --- a/docs/scripting/api.rst +++ b/docs/scripting/api.rst @@ -1,8 +1,22 @@ .. _api: -API -==== -.. automodule:: mitmproxy.models.http +API +=== + +- HTTP + - `mitmproxy.models.http.HTTPRequest <#mitmproxy.models.http.HTTPRequest>`_ + - `mitmproxy.models.http.HTTPResponse <#mitmproxy.models.http.HTTPResponse>`_ + - `mitmproxy.models.http.HTTPFlow <#mitmproxy.models.http.HTTPFlow>`_ + +HTTP +---- + +.. autoclass:: mitmproxy.models.http.HTTPRequest + :inherited-members: + +.. autoclass:: mitmproxy.models.http.HTTPResponse + :inherited-members: + +.. autoclass:: mitmproxy.models.http.HTTPFlow :inherited-members: - :members: HTTPFlow, HTTPRequest, HTTPResponse diff --git a/mitmproxy/models/http.py b/mitmproxy/models/http.py index 8c5524e27..a76d8b91d 100644 --- a/mitmproxy/models/http.py +++ b/mitmproxy/models/http.py @@ -12,10 +12,11 @@ class HTTPRequest(http.Request): """ A mitmproxy HTTP request. - This is a very thin wrapper on top of :py:class:`netlib.http.Request` and - may be removed in the future. """ + # This is a very thin wrapper on top of :py:class:`netlib.http.Request` and + # may be removed in the future. + def __init__( self, first_line_format, @@ -98,9 +99,9 @@ class HTTPResponse(http.Response): """ A mitmproxy HTTP response. - This is a very thin wrapper on top of :py:class:`netlib.http.Response` and - may be removed in the future. """ + # This is a very thin wrapper on top of :py:class:`netlib.http.Response` and + # may be removed in the future. def __init__( self, @@ -148,29 +149,30 @@ class HTTPResponse(http.Response): class HTTPFlow(flow.Flow): """ - A HTTPFlow is a collection of objects representing a single HTTP + An HTTPFlow is a collection of objects representing a single HTTP transaction. - - Attributes: - request: :py:class:`HTTPRequest` object - response: :py:class:`HTTPResponse` object - error: :py:class:`Error` object - server_conn: :py:class:`ServerConnection` object - client_conn: :py:class:`ClientConnection` object - intercepted: Is this flow currently being intercepted? - live: Does this flow have a live client connection? - - Note that it's possible for a Flow to have both a response and an error - object. This might happen, for instance, when a response was received - from the server, but there was an error sending it back to the client. """ def __init__(self, client_conn, server_conn, live=None): super(HTTPFlow, self).__init__("http", client_conn, server_conn, live) + self.request = None - """@type: HTTPRequest""" + """ :py:class:`HTTPRequest` object """ self.response = None - """@type: HTTPResponse""" + """ :py:class:`HTTPResponse` object """ + self.error = None + """ :py:class:`Error` object + + Note that it's possible for a Flow to have both a response and an error + object. This might happen, for instance, when a response was received + from the server, but there was an error sending it back to the client. + """ + self.server_conn = server_conn + """ :py:class:`ServerConnection` object """ + self.client_conn = client_conn + """:py:class:`ClientConnection` object """ + self.intercepted = False + """ Is this flow currently being intercepted? """ _stateobject_attributes = flow.Flow._stateobject_attributes.copy() _stateobject_attributes.update( From fb69c9c3453142ae6beb4040295accb41fdc6878 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 11:12:58 +1300 Subject: [PATCH 04/12] docs: overview, classes, arguments --- docs/scripting/api.rst | 8 ++++ docs/scripting/overview.rst | 72 +++++++++++++++++++++++--------- examples/arguments.py | 17 ++++++++ examples/classes.py | 7 ++++ examples/modify_response_body.py | 21 ---------- mitmproxy/models/flow.py | 1 - test/mitmproxy/test_examples.py | 7 +--- 7 files changed, 86 insertions(+), 47 deletions(-) create mode 100644 examples/arguments.py create mode 100644 examples/classes.py delete mode 100644 examples/modify_response_body.py diff --git a/docs/scripting/api.rst b/docs/scripting/api.rst index a17693294..a864b4423 100644 --- a/docs/scripting/api.rst +++ b/docs/scripting/api.rst @@ -8,6 +8,8 @@ API - `mitmproxy.models.http.HTTPRequest <#mitmproxy.models.http.HTTPRequest>`_ - `mitmproxy.models.http.HTTPResponse <#mitmproxy.models.http.HTTPResponse>`_ - `mitmproxy.models.http.HTTPFlow <#mitmproxy.models.http.HTTPFlow>`_ +- Errors + - `mitmproxy.models.flow.Error <#mitmproxy.models.flow.Error>`_ HTTP ---- @@ -20,3 +22,9 @@ HTTP .. autoclass:: mitmproxy.models.http.HTTPFlow :inherited-members: + +Errors +------ + +.. autoclass:: mitmproxy.models.flow.Error + :inherited-members: diff --git a/docs/scripting/overview.rst b/docs/scripting/overview.rst index a0dfe111c..f8dd9f2e6 100644 --- a/docs/scripting/overview.rst +++ b/docs/scripting/overview.rst @@ -1,13 +1,17 @@ .. _overview: -Overview -========= +Introduction +============ Mitmproxy has a powerful scripting API that allows you to control almost any aspect of traffic being proxied. In fact, much of mitmproxy's own core functionality is implemented using the exact same API exposed to scripters (see :src:`mitmproxy/builtins`). + +A simple example +---------------- + Scripting is event driven, with named handlers on the script object called at appropriate points of mitmproxy's operation. Here's a complete mitmproxy script that adds a new header to every HTTP response before it is returned to the @@ -17,18 +21,57 @@ client: :caption: :src:`examples/add_header.py` :language: python -All events that deal with an HTTP request get an instance of -:py:class:`~mitmproxy.models.HTTPFlow`, which we can use to manipulate the -response itself. We can now run this script using mitmdump or mitmproxy as -follows: +All events that deal with an HTTP request get an instance of `HTTPFlow +`_, which we can use to manipulate the +response itself. We can now run this script using mitmdump, and the new header +will be added to all responses passing through the proxy: >>> mitmdump -s add_header.py -The new header will be added to all responses passing through the proxy. + +Using classes +------------- + +In the example above, the script object is the ``add_header`` module itself. +That is, the handlers are declared at the global level of the script. This is +great for quick hacks, but soon becomes limiting as scripts become more +sophisticated. + +When a script first starts up, the `start `_, event is +called before anything else happens. You can replace the current script object +by returning it from this handler. Here's how this looks when applied to the +example above: + +.. literalinclude:: ../../examples/classes.py + :caption: :src:`examples/classes.py` + :language: python + +So here, we're using a module-level script to "boot up" into a class instance. +From this point on, the module-level script is removed from the handler chain, +and is replaced by the class instance. -mitmproxy comes with a variety of example inline scripts, which demonstrate -many basic tasks. +Handling arguments +------------------ + +Scripts can handle their own command-line arguments, just like any other Python +program. Let's build on the example above to do something slightly more +sophisticated - replace one value with another in all responses. Mitmproxy's +`HTTPRequest `_ and `HTTPResponse +`_ objects have a handy `replace +`_ method that takes care +of all the details for us. + +.. literalinclude:: ../../examples/arguments.py + :caption: :src:`examples/arguments.py` + :language: python + +We can now call this script on the command-line like this: + +>>> mitmdump -dd -s "./arguments.py html faketml" + +Whenever a handler is called, mitpmroxy rewrites the script environment so that +it sees its own arguments as if it was invoked from the command-line. Running scripts in parallel @@ -43,17 +86,6 @@ While that's usually a very desirable behaviour, blocking scripts can be run thr :caption: examples/nonblocking.py :language: python -Make scripts configurable with arguments ----------------------------------------- - -Sometimes, you want to pass runtime arguments to the inline script. This can be simply done by -surrounding the script call with quotes, e.g. ```mitmdump -s 'script.py --foo 42'``. -The arguments are then exposed in the start event: - -.. literalinclude:: ../../examples/modify_response_body.py - :caption: examples/modify_response_body.py - :language: python - Running scripts on saved flows ------------------------------ diff --git a/examples/arguments.py b/examples/arguments.py new file mode 100644 index 000000000..70851192c --- /dev/null +++ b/examples/arguments.py @@ -0,0 +1,17 @@ +import argparse + + +class Replacer: + def __init__(self, src, dst): + self.src, self.dst = src, dst + + def response(self, flow): + flow.response.replace(self.src, self.dst) + + +def start(): + parser = argparse.ArgumentParser() + parser.add_argument("src", type=str) + parser.add_argument("dst", type=str) + args = parser.parse_args() + return Replacer(args.src, args.dst) diff --git a/examples/classes.py b/examples/classes.py new file mode 100644 index 000000000..6443798a9 --- /dev/null +++ b/examples/classes.py @@ -0,0 +1,7 @@ +class AddHeader: + def response(self, flow): + flow.response.headers["newheader"] = "foo" + + +def start(): + return AddHeader() diff --git a/examples/modify_response_body.py b/examples/modify_response_body.py deleted file mode 100644 index b4632248b..000000000 --- a/examples/modify_response_body.py +++ /dev/null @@ -1,21 +0,0 @@ -# Usage: mitmdump -s "modify_response_body.py mitmproxy bananas" -# (this script works best with --anticache) -import sys - - -state = {} - - -def start(): - if len(sys.argv) != 3: - raise ValueError('Usage: -s "modify_response_body.py old new"') - # You may want to use Python's argparse for more sophisticated argument - # parsing. - state["old"], state["new"] = sys.argv[1].encode(), sys.argv[2].encode() - - -def response(flow): - flow.response.content = flow.response.content.replace( - state["old"], - state["new"] - ) diff --git a/mitmproxy/models/flow.py b/mitmproxy/models/flow.py index 118aa3d18..165acfe7d 100644 --- a/mitmproxy/models/flow.py +++ b/mitmproxy/models/flow.py @@ -24,7 +24,6 @@ class Error(stateobject.StateObject): Exposes the following attributes: - flow: Flow object msg: Message describing the error timestamp: Seconds since the epoch """ diff --git a/test/mitmproxy/test_examples.py b/test/mitmproxy/test_examples.py index 1ba7ba7a9..483cb2285 100644 --- a/test/mitmproxy/test_examples.py +++ b/test/mitmproxy/test_examples.py @@ -87,11 +87,8 @@ class TestScripts(mastertest.MasterTest): m.request(f) assert f.request.query["mitmproxy"] == "rocks" - def test_modify_response_body(self): - with tutils.raises(ScriptError): - tscript("modify_response_body.py") - - m, sc = tscript("modify_response_body.py", "mitmproxy rocks") + def test_arguments(self): + m, sc = tscript("arguments.py", "mitmproxy rocks") f = tutils.tflow(resp=netutils.tresp(content=b"I <3 mitmproxy")) m.response(f) assert f.response.content == b"I <3 rocks" From dc19ff7a9db611a6626b05b6d0e18992f84b405e Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 11:28:42 +1300 Subject: [PATCH 05/12] mitmdump: show script errors on startup Add the terminal logger before any other addons, so we see script errors on startup. --- mitmproxy/dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index 92dce37b4..70035fb7b 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -37,9 +37,9 @@ class DumpMaster(flow.FlowMaster): def __init__(self, server, options): flow.FlowMaster.__init__(self, options, server, flow.DummyState()) self.has_errored = False - self.addons.add(*builtins.default_addons()) self.addons.add(dumper.Dumper()) self.addons.add(termlog.TermLog()) + self.addons.add(*builtins.default_addons()) # This line is just for type hinting self.options = self.options # type: Options self.set_stream_large_bodies(options.stream_large_bodies) From a6c7a1ff918c5aa0285decb995096190888a2f51 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 11:34:27 +1300 Subject: [PATCH 06/12] scripts: handle SystemExit from scripts explicitly --- mitmproxy/builtins/script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mitmproxy/builtins/script.py b/mitmproxy/builtins/script.py index bbefc5c73..15ee49367 100644 --- a/mitmproxy/builtins/script.py +++ b/mitmproxy/builtins/script.py @@ -88,6 +88,8 @@ def scriptenv(path, args): sys.path.append(script_dir) try: yield + except SystemExit as v: + ctx.log.error("Script exited with code %s" % v.code) except Exception: etype, value, tb = sys.exc_info() tb = cut_traceback(tb, "scriptenv").tb_next From 55cb2a85472de8698b3dabc7ddc920b930e355d9 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 12:03:57 +1300 Subject: [PATCH 07/12] docs: logging and the context --- docs/index.rst | 1 - docs/scripting/api.rst | 20 ++++++++++++----- docs/scripting/context.rst | 4 ---- docs/scripting/overview.rst | 43 +++++++++++++++++++++++++------------ examples/logging.py | 6 ++++++ mitmproxy/controller.py | 21 +++++++++++++++--- 6 files changed, 68 insertions(+), 27 deletions(-) delete mode 100644 docs/scripting/context.rst create mode 100644 examples/logging.py diff --git a/docs/index.rst b/docs/index.rst index 9a948678a..cd32a1f68 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -52,7 +52,6 @@ :caption: Scripting scripting/overview - scripting/context scripting/events scripting/api diff --git a/docs/scripting/api.rst b/docs/scripting/api.rst index a864b4423..abc9ff3e3 100644 --- a/docs/scripting/api.rst +++ b/docs/scripting/api.rst @@ -4,12 +4,22 @@ API === +- Errors + - `mitmproxy.models.flow.Error <#mitmproxy.models.flow.Error>`_ - HTTP - `mitmproxy.models.http.HTTPRequest <#mitmproxy.models.http.HTTPRequest>`_ - `mitmproxy.models.http.HTTPResponse <#mitmproxy.models.http.HTTPResponse>`_ - `mitmproxy.models.http.HTTPFlow <#mitmproxy.models.http.HTTPFlow>`_ -- Errors - - `mitmproxy.models.flow.Error <#mitmproxy.models.flow.Error>`_ +- Logging + - `mitmproxy.controller.Log <#mitmproxy.controller.Log>`_ + - `mitmproxy.controller.LogEntry <#mitmproxy.controller.LogEntry>`_ + + +Errors +------ + +.. autoclass:: mitmproxy.models.flow.Error + :inherited-members: HTTP ---- @@ -23,8 +33,8 @@ HTTP .. autoclass:: mitmproxy.models.http.HTTPFlow :inherited-members: -Errors ------- +Logging +-------- -.. autoclass:: mitmproxy.models.flow.Error +.. autoclass:: mitmproxy.controller.Log :inherited-members: diff --git a/docs/scripting/context.rst b/docs/scripting/context.rst deleted file mode 100644 index 7c3515983..000000000 --- a/docs/scripting/context.rst +++ /dev/null @@ -1,4 +0,0 @@ -.. _context: - -Context -======= diff --git a/docs/scripting/overview.rst b/docs/scripting/overview.rst index f8dd9f2e6..a3b83e443 100644 --- a/docs/scripting/overview.rst +++ b/docs/scripting/overview.rst @@ -74,18 +74,24 @@ Whenever a handler is called, mitpmroxy rewrites the script environment so that it sees its own arguments as if it was invoked from the command-line. -Running scripts in parallel ---------------------------- +Logging and the context +----------------------- -We have a single flow primitive, so when a script is blocking, other requests are not processed. -While that's usually a very desirable behaviour, blocking scripts can be run threaded by using the -:py:obj:`mitmproxy.script.concurrent` decorator. -**If your script does not block, you should avoid the overhead of the decorator.** +Scripts should not output straight to stderr or stdout. Instead, the `log +`_ object on the ``ctx`` contexzt module +should be used, so that the mitmproxy host program can handle output +appropriately. So, mitmdump can print colorised sript output to the terminal, +and mitmproxy console can place script output in the event buffer. -.. literalinclude:: ../../examples/nonblocking.py - :caption: examples/nonblocking.py +Here's how this looks: + +.. literalinclude:: ../../examples/logging.py + :caption: :src:`examples/logging.py` :language: python +The ``ctx`` module also exposes the mitmproxy master object at ``ctx.master`` +for advanced usage. + Running scripts on saved flows ------------------------------ @@ -101,11 +107,20 @@ In this case, there are no client connections, and the events are run in the fol If the flow doesn't have a **response** or **error** associated with it, the matching events will be skipped. -Spaces in the script path -------------------------- -By default, spaces are interpreted as a separator between the inline script and its arguments -(e.g. ``-s 'foo.py 42'``). Consequently, the script path needs to be wrapped in a separate pair of -quotes if it contains spaces: ``-s '\'./foo bar/baz.py\' 42'``. +Concurrency +----------- -.. _GitHub: https://github.com/mitmproxy/mitmproxy +We have a single flow primitive, so when a script is blocking, other requests +are not processed. While that's usually a very desirable behaviour, blocking +scripts can be run threaded by using the :py:obj:`mitmproxy.script.concurrent` +decorator. + +.. literalinclude:: ../../examples/nonblocking.py + :caption: :src:`examples/nonblocking.py` + :language: python + + + +Developing scripts +------------------ diff --git a/examples/logging.py b/examples/logging.py new file mode 100644 index 000000000..dccfd8b2f --- /dev/null +++ b/examples/logging.py @@ -0,0 +1,6 @@ +from mitmproxy import ctx + + +def start(): + ctx.log.info("This is some informative text.") + ctx.log.error("This is an error.") diff --git a/mitmproxy/controller.py b/mitmproxy/controller.py index 7b9d460ac..4fd66bfab 100644 --- a/mitmproxy/controller.py +++ b/mitmproxy/controller.py @@ -49,24 +49,39 @@ class LogEntry(object): class Log(object): + """ + The central logger, exposed to scripts as mitmproxy.ctx.log. + """ def __init__(self, master): self.master = master - def __call__(self, text, level="info"): - self.master.add_log(text, level) - def debug(self, txt): + """ + Log with level debug. + """ self(txt, "debug") def info(self, txt): + """ + Log with level info. + """ self(txt, "info") def warn(self, txt): + """ + Log with level warn. + """ self(txt, "warn") def error(self, txt): + """ + Log with level error. + """ self(txt, "error") + def __call__(self, text, level="info"): + self.master.add_log(text, level) + class Master(object): """ From 072fff90f119375395a9b1b2fbef9667a46f7236 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 15 Oct 2016 18:00:21 -0700 Subject: [PATCH 08/12] docs: link to correct tag --- docs/conf.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ef5f05566..e1cbc4976 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,7 +1,9 @@ -import sys -import os import importlib import inspect +import os +import subprocess +import sys + sys.path.insert(0, os.path.abspath('..')) import netlib.version @@ -194,11 +196,20 @@ html_show_sourcelink = False # Output file base name for HTML help builder. htmlhelp_basename = 'mitmproxydoc' +last_tag, tag_dist, commit = ( + subprocess.check_output(["git", "describe", "--tags", "--long"]) + .decode() + .strip() + .rsplit("-", 2) +) +tag_dist = int(tag_dist) +if tag_dist == 0: + tag = last_tag +else: + tag = "master" -SRCBASE = "https://github.com/mitmproxy/mitmproxy/blob/master" +SRCBASE = "https://github.com/mitmproxy/mitmproxy/blob/{}".format(tag) - -# FIXME: change master to dynamic version before release extlinks = dict( src = (SRCBASE + r"/%s", '') ) From 97b594b84811da7bd90a615752c47c8982c1303c Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 16:34:04 +1300 Subject: [PATCH 09/12] mitmdump: fix addon order - dumper must be last This is so we can see the effects of script rewriting using -dd. --- mitmproxy/dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitmproxy/dump.py b/mitmproxy/dump.py index 70035fb7b..c25d93f8f 100644 --- a/mitmproxy/dump.py +++ b/mitmproxy/dump.py @@ -37,9 +37,9 @@ class DumpMaster(flow.FlowMaster): def __init__(self, server, options): flow.FlowMaster.__init__(self, options, server, flow.DummyState()) self.has_errored = False - self.addons.add(dumper.Dumper()) self.addons.add(termlog.TermLog()) self.addons.add(*builtins.default_addons()) + self.addons.add(dumper.Dumper()) # This line is just for type hinting self.options = self.options # type: Options self.set_stream_large_bodies(options.stream_large_bodies) From 57b8ed21a9a30eb79d9340d5e146e42bbafd0d46 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 18:25:59 +1300 Subject: [PATCH 10/12] docs: scripts on saved flows --- docs/scripting/overview.rst | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/docs/scripting/overview.rst b/docs/scripting/overview.rst index a3b83e443..744f5eb43 100644 --- a/docs/scripting/overview.rst +++ b/docs/scripting/overview.rst @@ -1,7 +1,7 @@ .. _overview: -Introduction -============ +Overview +======== Mitmproxy has a powerful scripting API that allows you to control almost any aspect of traffic being proxied. In fact, much of mitmproxy's own core @@ -96,11 +96,27 @@ for advanced usage. Running scripts on saved flows ------------------------------ -Sometimes, we want to run a script on :py:class:`~mitmproxy.models.Flow` objects that are already -complete. This happens when you start a script, and then load a saved set of flows from a file -(see the "scripted data transformation" example :ref:`here `). -It also happens when you run a one-shot script on a single flow through the ``|`` (pipe) shortcut -in mitmproxy. +When a flow is loaded from disk, the sequence of events that the flow would +have gone through on the wire is partially replayed. So, for instance, an HTTP +flow loaded from disk will trigger `requestheaders +`_, `request `_, +`responseheaders `_ and `response +`_ in order. We can use this behaviour to transform saved +traffic using scripts. For example, we can invoke the replacer script from +above on saved traffic as follows: + +>>> mitmdump -dd -s "./arguments.py html faketml" + + + + + +:py:class:`~mitmproxy.models.Flow` +objects that are already complete. This happens when you start a script, and +then load a saved set of flows from a file (see the "scripted data +transformation" example :ref:`here `). It also happens when you run a +one-shot script on a single flow through the ``|`` (pipe) shortcut in +mitmproxy. In this case, there are no client connections, and the events are run in the following order: **start**, **request**, **responseheaders**, **response**, **error**, **done**. From 9a0195bf64e746d5932125122bd56ec150f928bf Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 18:48:22 +1300 Subject: [PATCH 11/12] scripts: keep scripts just after the ScriptLoader in addon chain We need scripts to run _before_ filestreamer, so we can't just add them to the end of the chain. This patch also fixes an issue that could cause scripts to be initialised un-necessarily if only the order of scripts in options changed. --- mitmproxy/addons.py | 26 +++++++++++++++++++------- mitmproxy/builtins/script.py | 23 +++++++++++++++++++++-- test/mitmproxy/builtins/test_script.py | 8 ++------ test/mitmproxy/test_addons.py | 2 +- 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/mitmproxy/addons.py b/mitmproxy/addons.py index db126dd9a..b575b6071 100644 --- a/mitmproxy/addons.py +++ b/mitmproxy/addons.py @@ -28,20 +28,32 @@ class Addons(object): with self.master.handlecontext(): i.configure(options, updated) + def startup(self, s): + """ + Run startup events on addon. + """ + self.invoke_with_context(s, "start") + self.invoke_with_context( + s, + "configure", + self.master.options, + self.master.options.keys() + ) + def add(self, *addons): + """ + Add addons to the end of the chain, and run their startup events. + """ if not addons: raise ValueError("No addons specified.") self.chain.extend(addons) for i in addons: - self.invoke_with_context(i, "start") - self.invoke_with_context( - i, - "configure", - self.master.options, - self.master.options.keys() - ) + self.startup(i) def remove(self, addon): + """ + Remove an addon from the chain, and run its done events. + """ self.chain = [i for i in self.chain if i is not addon] self.invoke_with_context(addon, "done") diff --git a/mitmproxy/builtins/script.py b/mitmproxy/builtins/script.py index 15ee49367..5c375d27d 100644 --- a/mitmproxy/builtins/script.py +++ b/mitmproxy/builtins/script.py @@ -239,16 +239,35 @@ class ScriptLoader(): ctx.log.info("Un-loading script: %s" % a.name) ctx.master.addons.remove(a) + # The machinations below are to ensure that: + # - Scripts remain in the same order + # - Scripts are listed directly after the script addon. This is + # needed to ensure that interactions with, for instance, flow + # serialization remains correct. + # - Scripts are not initialized un-necessarily. If only a + # script's order in the script list has changed, it should simply + # be moved. + current = {} for a in ctx.master.addons.chain[:]: if isinstance(a, Script): current[a.name] = a ctx.master.addons.chain.remove(a) + ordered = [] + newscripts = [] for s in options.scripts: if s in current: - ctx.master.addons.chain.append(current[s]) + ordered.append(current[s]) else: ctx.log.info("Loading script: %s" % s) sc = Script(s) - ctx.master.addons.add(sc) + ordered.append(sc) + newscripts.append(sc) + + ochain = ctx.master.addons.chain + pos = ochain.index(self) + ctx.master.addons.chain = ochain[:pos+1] + ordered + ochain[pos+1:] + + for s in newscripts: + ctx.master.addons.startup(s) diff --git a/test/mitmproxy/builtins/test_script.py b/test/mitmproxy/builtins/test_script.py index 261adb659..544513130 100644 --- a/test/mitmproxy/builtins/test_script.py +++ b/test/mitmproxy/builtins/test_script.py @@ -237,12 +237,8 @@ class TestScriptLoader(mastertest.MasterTest): "%s %s" % (rec, "b"), ] debug = [(i[0], i[1]) for i in m.event_log if i[0] == "debug"] - assert debug == [ - ('debug', 'c configure'), - ('debug', 'a configure'), - ('debug', 'b configure'), - ] - m.event_log[:] = [] + # No events, only order has changed + assert debug == [] o.scripts = [ "%s %s" % (rec, "x"), diff --git a/test/mitmproxy/test_addons.py b/test/mitmproxy/test_addons.py index 52d7f07f3..c5d54e8c5 100644 --- a/test/mitmproxy/test_addons.py +++ b/test/mitmproxy/test_addons.py @@ -16,6 +16,6 @@ def test_simple(): o = options.Options() m = controller.Master(o) a = addons.Addons(m) - a.add(o, TAddon("one")) + a.add(TAddon("one")) assert a.get("one") assert not a.get("two") From 00603021d9d486e3e16511eee273d26f59a3ab10 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 16 Oct 2016 20:21:43 +1300 Subject: [PATCH 12/12] docs: concurrency, developing scripts --- docs/scripting/overview.rst | 39 ++++++++++++++++++------------------ mitmproxy/builtins/script.py | 2 +- release/README.mkd | 11 ---------- 3 files changed, 20 insertions(+), 32 deletions(-) diff --git a/docs/scripting/overview.rst b/docs/scripting/overview.rst index 744f5eb43..5966eb1de 100644 --- a/docs/scripting/overview.rst +++ b/docs/scripting/overview.rst @@ -105,38 +105,37 @@ flow loaded from disk will trigger `requestheaders traffic using scripts. For example, we can invoke the replacer script from above on saved traffic as follows: ->>> mitmdump -dd -s "./arguments.py html faketml" +>>> mitmdump -dd -s "./arguments.py html fakehtml" -r saved -w changed +This command starts the ``arguments`` script, reads all the flows from +``saved`` transforming them in the process, then writes them all to +``changed``. - - - -:py:class:`~mitmproxy.models.Flow` -objects that are already complete. This happens when you start a script, and -then load a saved set of flows from a file (see the "scripted data -transformation" example :ref:`here `). It also happens when you run a -one-shot script on a single flow through the ``|`` (pipe) shortcut in -mitmproxy. - -In this case, there are no client connections, and the events are run in the following order: -**start**, **request**, **responseheaders**, **response**, **error**, **done**. -If the flow doesn't have a **response** or **error** associated with it, the matching events will -be skipped. +The mitmproxy console tool provides interactive ways to run transforming +scripts on flows - for instance, you can run a one-shot script on a single flow +through the ``|`` (pipe) shortcut. Concurrency ----------- -We have a single flow primitive, so when a script is blocking, other requests -are not processed. While that's usually a very desirable behaviour, blocking -scripts can be run threaded by using the :py:obj:`mitmproxy.script.concurrent` -decorator. +The mitmproxy script mechanism is single threaded, and the proxy blocks while +script handlers execute. This hugely simplifies the most common case, where +handlers are light-weight and the blocking doesn't have a performance impact. +It's possible to implement a concurrent mechanism on top of the blocking +framework, and mitmproxy includes a handy example of this that is fit for most +purposes. You can use it as follows: .. literalinclude:: ../../examples/nonblocking.py :caption: :src:`examples/nonblocking.py` :language: python - Developing scripts ------------------ + +Mitmprxoy monitors scripts for modifications, and reloads them on change. When +this happens, the script is shut down (the `done `_ event is +called), and the new instance is started up as if the script had just been +loaded (the `start `_ and `configure +`_ events are called). diff --git a/mitmproxy/builtins/script.py b/mitmproxy/builtins/script.py index 5c375d27d..9bf25703e 100644 --- a/mitmproxy/builtins/script.py +++ b/mitmproxy/builtins/script.py @@ -267,7 +267,7 @@ class ScriptLoader(): ochain = ctx.master.addons.chain pos = ochain.index(self) - ctx.master.addons.chain = ochain[:pos+1] + ordered + ochain[pos+1:] + ctx.master.addons.chain = ochain[:pos + 1] + ordered + ochain[pos + 1:] for s in newscripts: ctx.master.addons.startup(s) diff --git a/release/README.mkd b/release/README.mkd index 7754125d0..8d63b8f33 100644 --- a/release/README.mkd +++ b/release/README.mkd @@ -1,15 +1,4 @@ -General build and release utilities for the mitmproxy, netlib and pathod -projects. These tools assume a directory structure with all repositories at the -same level, for example: - - /src - /mitmproxy - /netlib - /pathod - /release - - # Release policies - By default, every release is a new minor (`0.x`) release and it will be