From 14457f29b3d89e234d0791c4980e5cf9514185dd Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Thu, 3 Sep 2015 18:55:38 +0200 Subject: [PATCH] docs++ --- libmproxy/exceptions.py | 11 +++ libmproxy/protocol/__init__.py | 27 ++++++++ libmproxy/protocol/base.py | 116 +++++++++++++++++++------------- libmproxy/protocol/tls.py | 4 +- libmproxy/proxy/root_context.py | 22 ++++-- 5 files changed, 127 insertions(+), 53 deletions(-) diff --git a/libmproxy/exceptions.py b/libmproxy/exceptions.py index f34d97078..59436df75 100644 --- a/libmproxy/exceptions.py +++ b/libmproxy/exceptions.py @@ -1,9 +1,20 @@ +""" +We try to be very hygienic regarding the exceptions we throw: +Every Exception mitmproxy raises shall be a subclass of ProxyException. + + +See also: http://lucumr.pocoo.org/2014/10/16/on-error-handling/ +""" from __future__ import (absolute_import, print_function, division) class ProxyException(Exception): """ Base class for all exceptions thrown by libmproxy. + + Args: + message: the error message + cause: (optional) an error object that caused this exception, e.g. an IOError. """ def __init__(self, message, cause=None): """ diff --git a/libmproxy/protocol/__init__.py b/libmproxy/protocol/__init__.py index b0e66dbd7..35d59f287 100644 --- a/libmproxy/protocol/__init__.py +++ b/libmproxy/protocol/__init__.py @@ -1,3 +1,30 @@ +""" +In mitmproxy, protocols are implemented as a set of layers, which are composed on top each other. +The first layer is usually the proxy mode, e.g. transparent proxy or normal HTTP proxy. Next, +various protocol layers are stacked on top of each other - imagine WebSockets on top of an HTTP +Upgrade request. An actual mitmproxy connection may look as follows (outermost layer first): + + Transparent HTTP proxy, no TLS: + - TransparentProxy + - Http1Layer + - HttpLayer + + Regular proxy, CONNECT request with WebSockets over SSL: + - ReverseProxy + - Http1Layer + - HttpLayer + - TLSLayer + - WebsocketLayer (or TCPLayer) + +Every layer acts as a read-only context for its inner layers (see :py:class:`Layer`). To communicate +with an outer layer, a layer can use functions provided in the context. The next layer is always +determined by a call to :py:meth:`.next_layer() `, +which is provided by the root context. + +Another subtle design goal of this architecture is that upstream connections should be established +as late as possible; this makes server replay without any outgoing connections possible. +""" + from __future__ import (absolute_import, print_function, division) from .base import Layer, ServerConnectionMixin, Kill from .http import Http1Layer, Http2Layer diff --git a/libmproxy/protocol/base.py b/libmproxy/protocol/base.py index f27cb04be..9d8c8bfe4 100644 --- a/libmproxy/protocol/base.py +++ b/libmproxy/protocol/base.py @@ -1,37 +1,3 @@ -""" -mitmproxy protocol architecture - -In mitmproxy, protocols are implemented as a set of layers, which are composed on top each other. -For example, the following scenarios depict possible settings (lowest layer first): - -Transparent HTTP proxy, no SSL: - TransparentProxy - Http1Layer - HttpLayer - -Regular proxy, CONNECT request with WebSockets over SSL: - HttpProxy - Http1Layer - HttpLayer - SslLayer - WebsocketLayer (or TcpLayer) - -Automated protocol detection by peeking into the buffer: - TransparentProxy - TLSLayer - Http2Layer - HttpLayer - -Communication between layers is done as follows: - - lower layers provide context information to higher layers - - higher layers can call functions provided by lower layers, - which are propagated until they reach a suitable layer. - -Further goals: - - Connections should always be peekable to make automatic protocol detection work. - - Upstream connections should be established as late as possible; - inline scripts shall have a chance to handle everything locally. -""" from __future__ import (absolute_import, print_function, division) from netlib import tcp from ..models import ServerConnection @@ -43,8 +9,8 @@ class _LayerCodeCompletion(object): Dummy class that provides type hinting in PyCharm, which simplifies development a lot. """ - def __init__(self, *args, **kwargs): # pragma: nocover - super(_LayerCodeCompletion, self).__init__(*args, **kwargs) + def __init__(self, **mixin_args): # pragma: nocover + super(_LayerCodeCompletion, self).__init__(**mixin_args) if True: return self.config = None @@ -55,34 +21,64 @@ class _LayerCodeCompletion(object): """@type: libmproxy.models.ServerConnection""" self.channel = None """@type: libmproxy.controller.Channel""" + self.ctx = None + """@type: libmproxy.protocol.Layer""" class Layer(_LayerCodeCompletion): - def __init__(self, ctx, *args, **kwargs): + """ + Base class for all layers. All other protocol layers should inherit from this class. + """ + + def __init__(self, ctx, **mixin_args): """ + Each layer usually passes itself to its child layers as a context. Properties of the + context are transparently mapped to the layer, so that the following works: + + .. code-block:: python + + root_layer = Layer(None) + root_layer.client_conn = 42 + sub_layer = Layer(root_layer) + print(sub_layer.client_conn) # 42 + + The root layer is passed a :py:class:`libmproxy.proxy.RootContext` object, + which provides access to :py:attr:`.client_conn `, + :py:attr:`.next_layer ` and other basic attributes. + Args: - ctx: The (read-only) higher layer. + ctx: The (read-only) parent layer / context. """ self.ctx = ctx - """@type: libmproxy.protocol.Layer""" - super(Layer, self).__init__(*args, **kwargs) + """ + The parent layer. + + :type: :py:class:`Layer` + """ + super(Layer, self).__init__(**mixin_args) def __call__(self): - """ - Logic of the layer. + """Logic of the layer. + + Returns: + Once the protocol has finished without exceptions. + Raises: - ProtocolException in case of protocol exceptions. + ~libmproxy.exceptions.ProtocolException: if an exception occurs. No other exceptions must be raised. """ raise NotImplementedError() def __getattr__(self, name): """ - Attributes not present on the current layer may exist on a higher layer. + Attributes not present on the current layer are looked up on the context. """ return getattr(self.ctx, name) @property def layers(self): + """ + List of all layers, including the current layer (``[self, self.ctx, self.ctx.ctx, ...]``) + """ return [self] + self.ctx.layers def __repr__(self): @@ -92,6 +88,20 @@ class Layer(_LayerCodeCompletion): class ServerConnectionMixin(object): """ Mixin that provides a layer with the capabilities to manage a server connection. + The server address can be passed in the constructor or set by calling :py:meth:`set_server`. + Subclasses are responsible for calling :py:meth:`disconnect` before returning. + + Recommended Usage: + + .. code-block:: python + + class MyLayer(Layer, ServerConnectionMixin): + def __call__(self): + try: + # Do something. + finally: + if self.server_conn: + self.disconnect() """ def __init__(self, server_address=None): @@ -117,6 +127,14 @@ class ServerConnectionMixin(object): ) def set_server(self, address, server_tls=None, sni=None): + """ + Sets a new server address. If there is an existing connection, it will be closed. + + Raises: + ~libmproxy.exceptions.ProtocolException: + if ``server_tls`` is ``True``, but there was no TLS layer on the + protocol stack which could have processed this. + """ if self.server_conn: self.disconnect() self.log("Set new server address: " + repr(address), "debug") @@ -130,6 +148,7 @@ class ServerConnectionMixin(object): def disconnect(self): """ Deletes (and closes) an existing server connection. + Must not be called if there is no existing connection. """ self.log("serverdisconnect", "debug", [repr(self.server_conn.address)]) address = self.server_conn.address @@ -139,6 +158,13 @@ class ServerConnectionMixin(object): self.server_conn = ServerConnection(address) def connect(self): + """ + Establishes a server connection. + Must not be called if there is an existing connection. + + Raises: + ~libmproxy.exceptions.ProtocolException: if the connection could not be established. + """ if not self.server_conn.address: raise ProtocolException("Cannot connect to server, no server address given.") self.log("serverconnect", "debug", [repr(self.server_conn.address)]) @@ -152,5 +178,5 @@ class ServerConnectionMixin(object): class Kill(Exception): """ - Kill a connection. + Signal that both client and server connection(s) should be killed immediately. """ diff --git a/libmproxy/protocol/tls.py b/libmproxy/protocol/tls.py index a62b1a22f..88a8398f1 100644 --- a/libmproxy/protocol/tls.py +++ b/libmproxy/protocol/tls.py @@ -237,8 +237,8 @@ class TlsLayer(Layer): If so, we first connect to the server and then to the client. If not, we only connect to the client and do the server_ssl lazily on a Connect message. - An additional complexity is that establish ssl with the server may require a SNI value from the client. - In an ideal world, we'd do the following: + An additional complexity is that establish ssl with the server may require a SNI value from + the client. In an ideal world, we'd do the following: 1. Start the SSL handshake with the client 2. Check if the client sends a SNI. 3. Pause the client handshake, establish SSL with the server. diff --git a/libmproxy/proxy/root_context.py b/libmproxy/proxy/root_context.py index 88df8e47d..87a540c0b 100644 --- a/libmproxy/proxy/root_context.py +++ b/libmproxy/proxy/root_context.py @@ -11,21 +11,31 @@ from .modes import HttpProxy, HttpUpstreamProxy, ReverseProxy class RootContext(object): """ - The outmost context provided to the root layer. - As a consequence, every layer has .client_conn, .channel, .next_layer() and .config. + The outermost context provided to the root layer. + As a consequence, every layer has access to methods and attributes defined here. + + Attributes: + client_conn: + The :py:class:`client connection `. + channel: + A :py:class:`~libmproxy.controller.Channel` to communicate with the FlowMaster. + Provides :py:meth:`.ask() ` and + :py:meth:`.tell() ` methods. + config: + The :py:class:`proxy server's configuration ` """ def __init__(self, client_conn, config, channel): - self.client_conn = client_conn # Client Connection - self.channel = channel # provides .ask() method to communicate with FlowMaster - self.config = config # Proxy Configuration + self.client_conn = client_conn + self.channel = channel + self.config = config def next_layer(self, top_layer): """ This function determines the next layer in the protocol stack. Arguments: - top_layer: the current top layer. + top_layer: the current innermost layer. Returns: The next layer