diff --git a/docs/features/passthrough.rst b/docs/features/passthrough.rst index d68a49a99..00462e9d9 100644 --- a/docs/features/passthrough.rst +++ b/docs/features/passthrough.rst @@ -16,7 +16,7 @@ mechanism: If you want to peek into (SSL-protected) non-HTTP connections, check out the :ref:`tcpproxy` feature. If you want to ignore traffic from mitmproxy's processing because of large response bodies, -take a look at the :ref:`responsestreaming` feature. +take a look at the :ref:`streaming` feature. How it works ------------ @@ -89,7 +89,7 @@ Here are some other examples for ignore patterns: .. seealso:: - :ref:`tcpproxy` - - :ref:`responsestreaming` + - :ref:`streaming` - mitmproxy's "Limit" feature .. rubric:: Footnotes diff --git a/docs/features/responsestreaming.rst b/docs/features/responsestreaming.rst deleted file mode 100644 index 6fa93271d..000000000 --- a/docs/features/responsestreaming.rst +++ /dev/null @@ -1,68 +0,0 @@ -.. _responsestreaming: - -Response Streaming -================== - -By using mitmproxy's streaming feature, response contents can be passed to the client incrementally -before they have been fully received by the proxy. This is especially useful for large binary files -such as videos, where buffering the whole file slows down the client's browser. - -By default, mitmproxy will read the entire response, perform any indicated -manipulations on it and then send the (possibly modified) response to -the client. In some cases this is undesirable and you may wish to "stream" -the response back to the client. When streaming is enabled, the response is -not buffered on the proxy but directly sent back to the client instead. - -On the command-line -------------------- - -Streaming can be enabled on the command line for all response bodies exceeding a certain size. -The SIZE argument understands k/m/g suffixes, e.g. 3m for 3 megabytes. - -================== ================= -command-line ``--stream SIZE`` -================== ================= - -.. warning:: - - When response streaming is enabled, **streamed response contents will not be - recorded or preserved in any way.** - -.. note:: - - When response streaming is enabled, the response body cannot be modified by the usual means. - -Customizing Response Streaming ------------------------------- - -You can also use a script to customize exactly which responses are streamed. - -Responses that should be tagged for streaming by setting their ``.stream`` -attribute to ``True``: - -.. literalinclude:: ../../examples/complex/stream.py - :caption: examples/complex/stream.py - :language: python - -Implementation Details ----------------------- - -When response streaming is enabled, portions of the code which would have otherwise performed -changes on the response body will see an empty response body. Any modifications will be ignored. - -Streamed responses are usually sent in chunks of 4096 bytes. If the response is sent with a -``Transfer-Encoding: chunked`` header, the response will be streamed one chunk at a time. - -Modifying streamed data ------------------------ - -If the ``.stream`` attribute is callable, ``.stream`` will wrap the generator that yields all -chunks. - -.. literalinclude:: ../../examples/complex/stream_modify.py - :caption: examples/complex/stream_modify.py - :language: python - -.. seealso:: - - - :ref:`passthrough` diff --git a/docs/features/streaming.rst b/docs/features/streaming.rst new file mode 100644 index 000000000..82510843b --- /dev/null +++ b/docs/features/streaming.rst @@ -0,0 +1,102 @@ +.. _streaming: + +HTTP Streaming +============== + +By default, mitmproxy will read the entire request/response, perform any indicated +manipulations on it and then send the (possibly modified) message to +the other party. In some cases this is undesirable and you may wish to "stream" +the request/response. When streaming is enabled, the request/response is +not buffered on the proxy but directly sent to the server/client instead. +HTTP headers are still fully buffered before being sent. + +Request Streaming +----------------- + +Request streaming can be used to incrementally stream a request body to the server +before it has been fully received by the proxy. This is useful for large file uploads. + +Response Streaming +------------------ + +By using mitmproxy's streaming feature, response contents can be passed to the client incrementally +before they have been fully received by the proxy. This is especially useful for large binary files +such as videos, where buffering the whole file slows down the client's browser. + +On the command-line +------------------- + +Streaming can be enabled on the command line for all request and response bodies exceeding a certain size. +The SIZE argument understands k/m/g suffixes, e.g. 3m for 3 megabytes. + +================== ================= +command-line ``--set stream_large_bodies=SIZE`` +================== ================= + +.. warning:: + + When streaming is enabled, **streamed request/response contents will not be + recorded or preserved in any way.** + +.. note:: + + When streaming is enabled, the request/response body cannot be modified by the usual means. + +Customizing Streaming +--------------------- + +You can also use a script to customize exactly which requests or responses are streamed. + +Requests/Responses that should be tagged for streaming by setting their ``.stream`` +attribute to ``True``: + +.. literalinclude:: ../../examples/complex/stream.py + :caption: examples/complex/stream.py + :language: python + +Implementation Details +---------------------- + +When response streaming is enabled, portions of the code which would have otherwise performed +changes on the request/response body will see an empty body. Any modifications will be ignored. + +Streamed bodies are usually sent in chunks of 4096 bytes. If the response is sent with a +``Transfer-Encoding: chunked`` header, the response will be streamed one chunk at a time. + +Modifying streamed data +----------------------- + +If the ``.stream`` attribute is callable, ``.stream`` will wrap the generator that yields all +chunks. + +.. literalinclude:: ../../examples/complex/stream_modify.py + :caption: examples/complex/stream_modify.py + :language: python + +WebSocket Streaming +=================== + +The WebSocket streaming feature can be used to send the frames as soon as they arrive. This can be useful for large binary file transfers. + +On the command-line +------------------- + +Streaming can be enabled on the command line for all WebSocket frames + +================== ================= +command-line ``--set stream_websockets=true`` +================== ================= + +.. note:: + + When Web Socket streaming is enabled, the message payload cannot be modified. + +Implementation Details +---------------------- +When WebSocket streaming is enabled, portions of the code which may perform changes to the WebSocket message payloads will not have +any effect on the actual payload sent to the server as the frames are immediately forwarded to the server. +In contrast to HTTP streaming, where the body is not stored, the message payload will still be stored in the WebSocket Flow. + +.. seealso:: + + - :ref:`passthrough` diff --git a/docs/features/tcpproxy.rst b/docs/features/tcpproxy.rst index 0825c0249..cba374e3d 100644 --- a/docs/features/tcpproxy.rst +++ b/docs/features/tcpproxy.rst @@ -28,4 +28,4 @@ feature. .. seealso:: - :ref:`passthrough` - - :ref:`responsestreaming` + - :ref:`streaming` diff --git a/docs/index.rst b/docs/index.rst index a4e37e713..7cf593ff2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,7 +33,7 @@ features/passthrough features/proxyauth features/reverseproxy - features/responsestreaming + features/streaming features/socksproxy features/sticky features/tcpproxy diff --git a/examples/complex/stream.py b/examples/complex/stream.py index 1993cf7f3..ae365ec5b 100644 --- a/examples/complex/stream.py +++ b/examples/complex/stream.py @@ -1,6 +1,6 @@ def responseheaders(flow): """ Enables streaming for all responses. - This is equivalent to passing `--stream 0` to mitmproxy. + This is equivalent to passing `--set stream_large_bodies=1` to mitmproxy. """ flow.response.stream = True diff --git a/mitmproxy/addons/script.py b/mitmproxy/addons/script.py index e90dd8855..b4274f8c3 100644 --- a/mitmproxy/addons/script.py +++ b/mitmproxy/addons/script.py @@ -52,11 +52,19 @@ class Script: def tick(self): if time.time() - self.last_load > self.ReloadInterval: - mtime = os.stat(self.fullpath).st_mtime + try: + mtime = os.stat(self.fullpath).st_mtime + except FileNotFoundError: + scripts = ctx.options.scripts + scripts.remove(self.path) + ctx.options.update(scripts=scripts) + return + if mtime > self.last_mtime: ctx.log.info("Loading script: %s" % self.path) if self.ns: ctx.master.addons.remove(self.ns) + del sys.modules[self.ns.__name__] self.ns = load_script(ctx, self.fullpath) if self.ns: # We're already running, so we have to explicitly register and diff --git a/mitmproxy/addons/streambodies.py b/mitmproxy/addons/streambodies.py index 181f03371..c841075fb 100644 --- a/mitmproxy/addons/streambodies.py +++ b/mitmproxy/addons/streambodies.py @@ -28,12 +28,18 @@ class StreamBodies: if expected_size and not r.raw_content and not (0 <= expected_size <= self.max_size): # r.stream may already be a callable, which we want to preserve. r.stream = r.stream or True - # FIXME: make message generic when we add rquest streaming - ctx.log.info("Streaming response from %s" % f.request.host) + ctx.log.info("Streaming {} {}".format("response from" if not is_request else "request to", f.request.host)) - # FIXME! Request streaming doesn't work at the moment. def requestheaders(self, f): self.run(f, True) def responseheaders(self, f): self.run(f, False) + + def websocket_start(self, f): + if ctx.options.stream_websockets: + f.stream = True + ctx.log.info("Streaming WebSocket messages between {client} and {server}".format( + client=human.format_address(f.client_conn.address), + server=human.format_address(f.server_conn.address)) + ) diff --git a/mitmproxy/contentviews/protobuf.py b/mitmproxy/contentviews/protobuf.py index 4bbb15809..abd3985a6 100644 --- a/mitmproxy/contentviews/protobuf.py +++ b/mitmproxy/contentviews/protobuf.py @@ -1,6 +1,63 @@ -import subprocess +import io +from kaitaistruct import KaitaiStream from . import base +from mitmproxy.contrib.kaitaistruct import google_protobuf + + +def write_buf(out, field_tag, body, indent_level): + if body is not None: + out.write("{: <{level}}{}: {}\n".format('', field_tag, body if isinstance(body, int) else str(body, 'utf-8'), + level=indent_level)) + elif field_tag is not None: + out.write(' ' * indent_level + str(field_tag) + " {\n") + else: + out.write(' ' * indent_level + "}\n") + + +def format_pbuf(raw): + out = io.StringIO() + stack = [] + + try: + buf = google_protobuf.GoogleProtobuf(KaitaiStream(io.BytesIO(raw))) + except: + return False + stack.extend([(pair, 0) for pair in buf.pairs[::-1]]) + + while len(stack): + pair, indent_level = stack.pop() + + if pair.wire_type == pair.WireTypes.group_start: + body = None + elif pair.wire_type == pair.WireTypes.group_end: + body = None + pair._m_field_tag = None + elif pair.wire_type == pair.WireTypes.len_delimited: + body = pair.value.body + elif pair.wire_type == pair.WireTypes.varint: + body = pair.value.value + else: + body = pair.value + + try: + next_buf = google_protobuf.GoogleProtobuf(KaitaiStream(io.BytesIO(body))) + stack.extend([(pair, indent_level + 2) for pair in next_buf.pairs[::-1]]) + write_buf(out, pair.field_tag, None, indent_level) + except: + write_buf(out, pair.field_tag, body, indent_level) + + if stack: + prev_level = stack[-1][1] + else: + prev_level = 0 + + if prev_level < indent_level: + levels = int((indent_level - prev_level) / 2) + for i in range(1, levels + 1): + write_buf(out, None, None, indent_level - i * 2) + + return out.getvalue() class ViewProtobuf(base.View): @@ -15,28 +72,9 @@ class ViewProtobuf(base.View): "application/x-protobuffer", ] - def is_available(self): - try: - p = subprocess.Popen( - ["protoc", "--version"], - stdout=subprocess.PIPE - ) - out, _ = p.communicate() - return out.startswith(b"libprotoc") - except: - return False - def __call__(self, data, **metadata): - if not self.is_available(): - raise NotImplementedError("protoc not found. Please make sure 'protoc' is available in $PATH.") - - # if Popen raises OSError, it will be caught in - # get_content_view and fall back to Raw - p = subprocess.Popen(['protoc', '--decode_raw'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - decoded, _ = p.communicate(input=data) + decoded = format_pbuf(data) if not decoded: raise ValueError("Failed to parse input.") + return "Protobuf", base.format_text(decoded) diff --git a/mitmproxy/contrib/kaitaistruct/google_protobuf.py b/mitmproxy/contrib/kaitaistruct/google_protobuf.py new file mode 100644 index 000000000..fe2336cc9 --- /dev/null +++ b/mitmproxy/contrib/kaitaistruct/google_protobuf.py @@ -0,0 +1,124 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO +from enum import Enum + + +if parse_version(ks_version) < parse_version('0.7'): + raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version)) + +from .vlq_base128_le import VlqBase128Le +class GoogleProtobuf(KaitaiStruct): + """Google Protocol Buffers (AKA protobuf) is a popular data + serialization scheme used for communication protocols, data storage, + etc. There are implementations are available for almost every + popular language. The focus points of this scheme are brevity (data + is encoded in a very size-efficient manner) and extensibility (one + can add keys to the structure, while keeping it readable in previous + version of software). + + Protobuf uses semi-self-describing encoding scheme for its + messages. It means that it is possible to parse overall structure of + the message (skipping over fields one can't understand), but to + fully understand the message, one needs a protocol definition file + (`.proto`). To be specific: + + * "Keys" in key-value pairs provided in the message are identified + only with an integer "field tag". `.proto` file provides info on + which symbolic field names these field tags map to. + * "Keys" also provide something called "wire type". It's not a data + type in its common sense (i.e. you can't, for example, distinguish + `sint32` vs `uint32` vs some enum, or `string` from `bytes`), but + it's enough information to determine how many bytes to + parse. Interpretation of the value should be done according to the + type specified in `.proto` file. + * There's no direct information on which fields are optional / + required, which fields may be repeated or constitute a map, what + restrictions are placed on fields usage in a single message, what + are the fields' default values, etc, etc. + + .. seealso:: + Source - https://developers.google.com/protocol-buffers/docs/encoding + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.pairs = [] + while not self._io.is_eof(): + self.pairs.append(self._root.Pair(self._io, self, self._root)) + + + class Pair(KaitaiStruct): + """Key-value pair.""" + + class WireTypes(Enum): + varint = 0 + bit_64 = 1 + len_delimited = 2 + group_start = 3 + group_end = 4 + bit_32 = 5 + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.key = VlqBase128Le(self._io) + _on = self.wire_type + if _on == self._root.Pair.WireTypes.varint: + self.value = VlqBase128Le(self._io) + elif _on == self._root.Pair.WireTypes.len_delimited: + self.value = self._root.DelimitedBytes(self._io, self, self._root) + elif _on == self._root.Pair.WireTypes.bit_64: + self.value = self._io.read_u8le() + elif _on == self._root.Pair.WireTypes.bit_32: + self.value = self._io.read_u4le() + + @property + def wire_type(self): + """"Wire type" is a part of the "key" that carries enough + information to parse value from the wire, i.e. read correct + amount of bytes, but there's not enough informaton to + interprete in unambiguously. For example, one can't clearly + distinguish 64-bit fixed-sized integers from 64-bit floats, + signed zigzag-encoded varints from regular unsigned varints, + arbitrary bytes from UTF-8 encoded strings, etc. + """ + if hasattr(self, '_m_wire_type'): + return self._m_wire_type if hasattr(self, '_m_wire_type') else None + + self._m_wire_type = self._root.Pair.WireTypes((self.key.value & 7)) + return self._m_wire_type if hasattr(self, '_m_wire_type') else None + + @property + def field_tag(self): + """Identifies a field of protocol. One can look up symbolic + field name in a `.proto` file by this field tag. + """ + if hasattr(self, '_m_field_tag'): + return self._m_field_tag if hasattr(self, '_m_field_tag') else None + + self._m_field_tag = (self.key.value >> 3) + return self._m_field_tag if hasattr(self, '_m_field_tag') else None + + + class DelimitedBytes(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.len = VlqBase128Le(self._io) + self.body = self._io.read_bytes(self.len.value) + + + diff --git a/mitmproxy/contrib/kaitaistruct/make.sh b/mitmproxy/contrib/kaitaistruct/make.sh index 789829cf6..0a30358aa 100755 --- a/mitmproxy/contrib/kaitaistruct/make.sh +++ b/mitmproxy/contrib/kaitaistruct/make.sh @@ -7,5 +7,7 @@ wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/jpeg.ksy wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/png.ksy wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/image/ico.ksy +wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/common/vlq_base128_le.ksy +wget -N https://raw.githubusercontent.com/kaitai-io/kaitai_struct_formats/master/serialization/google_protobuf.ksy kaitai-struct-compiler --target python --opaque-types=true *.ksy diff --git a/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py new file mode 100644 index 000000000..235759b7a --- /dev/null +++ b/mitmproxy/contrib/kaitaistruct/vlq_base128_le.py @@ -0,0 +1,94 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO + + +if parse_version(ks_version) < parse_version('0.7'): + raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version)) + +class VlqBase128Le(KaitaiStruct): + """A variable-length unsigned integer using base128 encoding. 1-byte groups + consists of 1-bit flag of continuation and 7-bit value, and are ordered + "least significant group first", i.e. in "little-endian" manner. + + This particular encoding is specified and used in: + + * DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128". + http://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139 + * Google Protocol Buffers, where it's called "Base 128 Varints". + https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints + * Apache Lucene, where it's called "VInt" + http://lucene.apache.org/core/3_5_0/fileformats.html#VInt + * Apache Avro uses this as a basis for integer encoding, adding ZigZag on + top of it for signed ints + http://avro.apache.org/docs/current/spec.html#binary_encode_primitive + + More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128 + + This particular implementation supports serialized values to up 8 bytes long. + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.groups = [] + while True: + _ = self._root.Group(self._io, self, self._root) + self.groups.append(_) + if not (_.has_next): + break + + class Group(KaitaiStruct): + """One byte group, clearly divided into 7-bit "value" and 1-bit "has continuation + in the next byte" flag. + """ + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.b = self._io.read_u1() + + @property + def has_next(self): + """If true, then we have more bytes to read.""" + if hasattr(self, '_m_has_next'): + return self._m_has_next if hasattr(self, '_m_has_next') else None + + self._m_has_next = (self.b & 128) != 0 + return self._m_has_next if hasattr(self, '_m_has_next') else None + + @property + def value(self): + """The 7-bit (base128) numeric value of this group.""" + if hasattr(self, '_m_value'): + return self._m_value if hasattr(self, '_m_value') else None + + self._m_value = (self.b & 127) + return self._m_value if hasattr(self, '_m_value') else None + + + @property + def len(self): + if hasattr(self, '_m_len'): + return self._m_len if hasattr(self, '_m_len') else None + + self._m_len = len(self.groups) + return self._m_len if hasattr(self, '_m_len') else None + + @property + def value(self): + """Resulting value as normal integer.""" + if hasattr(self, '_m_value'): + return self._m_value if hasattr(self, '_m_value') else None + + self._m_value = (((((((self.groups[0].value + ((self.groups[1].value << 7) if self.len >= 2 else 0)) + ((self.groups[2].value << 14) if self.len >= 3 else 0)) + ((self.groups[3].value << 21) if self.len >= 4 else 0)) + ((self.groups[4].value << 28) if self.len >= 5 else 0)) + ((self.groups[5].value << 35) if self.len >= 6 else 0)) + ((self.groups[6].value << 42) if self.len >= 7 else 0)) + ((self.groups[7].value << 49) if self.len >= 8 else 0)) + return self._m_value if hasattr(self, '_m_value') else None + + diff --git a/mitmproxy/options.py b/mitmproxy/options.py index a38726793..e6c2fed67 100644 --- a/mitmproxy/options.py +++ b/mitmproxy/options.py @@ -154,6 +154,13 @@ class Options(optmanager.OptManager): Understands k/m/g suffixes, i.e. 3m for 3 megabytes. """ ) + self.add_option( + "stream_websockets", bool, False, + """ + Stream WebSocket messages between client and server. + Messages are captured and cannot be modified. + """ + ) self.add_option( "verbosity", int, 2, "Log verbosity." diff --git a/mitmproxy/proxy/protocol/http.py b/mitmproxy/proxy/protocol/http.py index 458708305..502280c14 100644 --- a/mitmproxy/proxy/protocol/http.py +++ b/mitmproxy/proxy/protocol/http.py @@ -273,7 +273,10 @@ class HttpLayer(base.Layer): self.send_response(http.expect_continue_response) request.headers.pop("expect") - request.data.content = b"".join(self.read_request_body(request)) + if f.request.stream: + f.request.data.content = None + else: + f.request.data.content = b"".join(self.read_request_body(request)) request.timestamp_end = time.time() except exceptions.HttpException as e: # We optimistically guess there might be an HTTP client on the @@ -326,12 +329,8 @@ class HttpLayer(base.Layer): f.request.scheme ) - def get_response(): - self.send_request(f.request) - f.response = self.read_response_headers() - try: - get_response() + self.send_request_headers(f.request) except exceptions.NetlibException as e: self.log( "server communication error: %s" % repr(e), @@ -357,7 +356,19 @@ class HttpLayer(base.Layer): self.disconnect() self.connect() - get_response() + self.send_request_headers(f.request) + + # This is taken out of the try except block because when streaming + # we can't send the request body while retrying as the generator gets exhausted + if f.request.stream: + chunks = self.read_request_body(f.request) + if callable(f.request.stream): + chunks = f.request.stream(chunks) + self.send_request_body(f.request, chunks) + else: + self.send_request_body(f.request, [f.request.data.content]) + + f.response = self.read_response_headers() # call the appropriate script hook - this is an opportunity for # an inline script to set f.stream = True diff --git a/mitmproxy/proxy/protocol/http1.py b/mitmproxy/proxy/protocol/http1.py index cafc26827..84cd63249 100644 --- a/mitmproxy/proxy/protocol/http1.py +++ b/mitmproxy/proxy/protocol/http1.py @@ -22,6 +22,16 @@ class Http1Layer(httpbase._HttpTransmissionLayer): self.config.options._processed.get("body_size_limit") ) + def send_request_headers(self, request): + headers = http1.assemble_request_head(request) + self.server_conn.wfile.write(headers) + self.server_conn.wfile.flush() + + def send_request_body(self, request, chunks): + for chunk in http1.assemble_body(request.headers, chunks): + self.server_conn.wfile.write(chunk) + self.server_conn.wfile.flush() + def send_request(self, request): self.server_conn.wfile.write(http1.assemble_request(request)) self.server_conn.wfile.flush() diff --git a/mitmproxy/proxy/protocol/http2.py b/mitmproxy/proxy/protocol/http2.py index ace7ecdea..eab5292f7 100644 --- a/mitmproxy/proxy/protocol/http2.py +++ b/mitmproxy/proxy/protocol/http2.py @@ -487,14 +487,23 @@ class Http2SingleStreamLayer(httpbase._HttpTransmissionLayer, basethread.BaseThr @detect_zombie_stream def read_request_body(self, request): - self.request_data_finished.wait() - data = [] - while self.request_data_queue.qsize() > 0: - data.append(self.request_data_queue.get()) - return data + if not request.stream: + self.request_data_finished.wait() + + while True: + try: + yield self.request_data_queue.get(timeout=0.1) + except queue.Empty: # pragma: no cover + pass + if self.request_data_finished.is_set(): + self.raise_zombie() + while self.request_data_queue.qsize() > 0: + yield self.request_data_queue.get() + break + self.raise_zombie() @detect_zombie_stream - def send_request(self, message): + def send_request_headers(self, request): if self.pushed: # nothing to do here return @@ -519,10 +528,10 @@ class Http2SingleStreamLayer(httpbase._HttpTransmissionLayer, basethread.BaseThr self.server_stream_id = self.connections[self.server_conn].get_next_available_stream_id() self.server_to_client_stream_ids[self.server_stream_id] = self.client_stream_id - headers = message.headers.copy() - headers.insert(0, ":path", message.path) - headers.insert(0, ":method", message.method) - headers.insert(0, ":scheme", message.scheme) + headers = request.headers.copy() + headers.insert(0, ":path", request.path) + headers.insert(0, ":method", request.method) + headers.insert(0, ":scheme", request.scheme) priority_exclusive = None priority_depends_on = None @@ -553,13 +562,24 @@ class Http2SingleStreamLayer(httpbase._HttpTransmissionLayer, basethread.BaseThr self.raise_zombie() self.connections[self.server_conn].lock.release() + @detect_zombie_stream + def send_request_body(self, request, chunks): + if self.pushed: + # nothing to do here + return + if not self.no_body: self.connections[self.server_conn].safe_send_body( self.raise_zombie, self.server_stream_id, - [message.content] + chunks ) + @detect_zombie_stream + def send_request(self, message): + self.send_request_headers(message) + self.send_request_body(message, [message.content]) + @detect_zombie_stream def read_response_headers(self): self.response_arrived.wait() diff --git a/mitmproxy/proxy/protocol/websocket.py b/mitmproxy/proxy/protocol/websocket.py index 373c6479d..19546eb2e 100644 --- a/mitmproxy/proxy/protocol/websocket.py +++ b/mitmproxy/proxy/protocol/websocket.py @@ -55,6 +55,7 @@ class WebSocketLayer(base.Layer): return self._handle_unknown_frame(frame, source_conn, other_conn, is_server) def _handle_data_frame(self, frame, source_conn, other_conn, is_server): + fb = self.server_frame_buffer if is_server else self.client_frame_buffer fb.append(frame) @@ -70,43 +71,51 @@ class WebSocketLayer(base.Layer): self.flow.messages.append(websocket_message) self.channel.ask("websocket_message", self.flow) - def get_chunk(payload): - if len(payload) == length: - # message has the same length, we can reuse the same sizes - pos = 0 - for s in original_chunk_sizes: - yield payload[pos:pos + s] - pos += s + if not self.flow.stream: + def get_chunk(payload): + if len(payload) == length: + # message has the same length, we can reuse the same sizes + pos = 0 + for s in original_chunk_sizes: + yield payload[pos:pos + s] + pos += s + else: + # just re-chunk everything into 4kB frames + # header len = 4 bytes without masking key and 8 bytes with masking key + chunk_size = 4092 if is_server else 4088 + chunks = range(0, len(payload), chunk_size) + for i in chunks: + yield payload[i:i + chunk_size] + + frms = [ + websockets.Frame( + payload=chunk, + opcode=frame.header.opcode, + mask=(False if is_server else 1), + masking_key=(b'' if is_server else os.urandom(4))) + for chunk in get_chunk(websocket_message.content) + ] + + if len(frms) > 0: + frms[-1].header.fin = True else: - # just re-chunk everything into 10kB frames - chunk_size = 10240 - chunks = range(0, len(payload), chunk_size) - for i in chunks: - yield payload[i:i + chunk_size] + frms.append(websockets.Frame( + fin=True, + opcode=websockets.OPCODE.CONTINUE, + mask=(False if is_server else 1), + masking_key=(b'' if is_server else os.urandom(4)))) - frms = [ - websockets.Frame( - payload=chunk, - opcode=frame.header.opcode, - mask=(False if is_server else 1), - masking_key=(b'' if is_server else os.urandom(4))) - for chunk in get_chunk(websocket_message.content) - ] + frms[0].header.opcode = message_type + frms[0].header.rsv1 = compressed_message + + for frm in frms: + other_conn.send(bytes(frm)) - if len(frms) > 0: - frms[-1].header.fin = True else: - frms.append(websockets.Frame( - fin=True, - opcode=websockets.OPCODE.CONTINUE, - mask=(False if is_server else 1), - masking_key=(b'' if is_server else os.urandom(4)))) + other_conn.send(bytes(frame)) - frms[0].header.opcode = message_type - frms[0].header.rsv1 = compressed_message - - for frm in frms: - other_conn.send(bytes(frm)) + elif self.flow.stream: + other_conn.send(bytes(frame)) return True diff --git a/mitmproxy/websocket.py b/mitmproxy/websocket.py index 30967a91f..ded09f655 100644 --- a/mitmproxy/websocket.py +++ b/mitmproxy/websocket.py @@ -45,6 +45,7 @@ class WebSocketFlow(flow.Flow): self.close_code = '(status code missing)' self.close_message = '(message missing)' self.close_reason = 'unknown status code' + self.stream = False if handshake_flow: self.client_key = websockets.get_client_key(handshake_flow.request.headers) diff --git a/setup.py b/setup.py index f3a42ac5f..212ad95ea 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setup( "ldap3>=2.2.0, <2.3", "passlib>=1.6.5, <1.8", "pyasn1>=0.1.9, <0.3", - "pyOpenSSL>=16.0, <17.1", + "pyOpenSSL>=16.0,<17.2", "pyparsing>=2.1.3, <2.3", "pyperclip>=1.5.22, <1.6", "requests>=2.9.1, <3", @@ -98,14 +98,14 @@ setup( "pytest>=3.1, <4", "rstcheck>=2.2, <4.0", "sphinx_rtd_theme>=0.1.9, <0.3", - "sphinx-autobuild>=0.5.2, <0.7", + "sphinx-autobuild>=0.5.2, <0.8", "sphinx>=1.3.5, <1.7", "sphinxcontrib-documentedlist>=0.5.0, <0.7", "tox>=2.3, <3", ], 'examples': [ "beautifulsoup4>=4.4.1, <4.7", - "Pillow>=3.2, <4.2", + "Pillow>=3.2,<4.3", ] } ) diff --git a/test/mitmproxy/addons/test_script.py b/test/mitmproxy/addons/test_script.py index dd5349cb7..03b1f6208 100644 --- a/test/mitmproxy/addons/test_script.py +++ b/test/mitmproxy/addons/test_script.py @@ -1,6 +1,7 @@ import traceback import sys import time +import os import pytest from unittest import mock @@ -183,6 +184,20 @@ class TestScriptLoader: scripts = ["one", "one"] ) + def test_script_deletion(self): + tdir = tutils.test_data.path("mitmproxy/data/addonscripts/") + with open(tdir + "/dummy.py", 'w') as f: + f.write("\n") + with taddons.context() as tctx: + sl = script.ScriptLoader() + tctx.master.addons.add(sl) + tctx.configure(sl, scripts=[tutils.test_data.path("mitmproxy/data/addonscripts/dummy.py")]) + + os.remove(tutils.test_data.path("mitmproxy/data/addonscripts/dummy.py")) + tctx.invoke(sl, "tick") + assert not tctx.options.scripts + assert not sl.addons + def test_order(self): rec = tutils.test_data.path("mitmproxy/data/addonscripts/recorder") sc = script.ScriptLoader() diff --git a/test/mitmproxy/addons/test_streambodies.py b/test/mitmproxy/addons/test_streambodies.py index c6ce5e81c..426ec9ae0 100644 --- a/test/mitmproxy/addons/test_streambodies.py +++ b/test/mitmproxy/addons/test_streambodies.py @@ -29,3 +29,9 @@ def test_simple(): f = tflow.tflow(resp=True) f.response.headers["content-length"] = "invalid" tctx.cycle(sa, f) + + tctx.configure(sa, stream_websockets = True) + f = tflow.twebsocketflow() + assert not f.stream + sa.websocket_start(f) + assert f.stream diff --git a/test/mitmproxy/contentviews/test_protobuf.py b/test/mitmproxy/contentviews/test_protobuf.py index 71e515769..6c6e37f2b 100644 --- a/test/mitmproxy/contentviews/test_protobuf.py +++ b/test/mitmproxy/contentviews/test_protobuf.py @@ -1,52 +1,31 @@ -from unittest import mock import pytest from mitmproxy.contentviews import protobuf from mitmproxy.test import tutils from . import full_eval +data = tutils.test_data.push("mitmproxy/contentviews/test_protobuf_data/") + def test_view_protobuf_request(): v = full_eval(protobuf.ViewProtobuf()) - p = tutils.test_data.path("mitmproxy/data/protobuf01") + p = data.path("protobuf01") - with mock.patch('mitmproxy.contentviews.protobuf.ViewProtobuf.is_available'): - with mock.patch('subprocess.Popen') as n: - m = mock.Mock() - attrs = {'communicate.return_value': (b'1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"', True)} - m.configure_mock(**attrs) - n.return_value = m - - with open(p, "rb") as f: - data = f.read() - content_type, output = v(data) - assert content_type == "Protobuf" - assert output[0] == [('text', b'1: "3bbc333c-e61c-433b-819a-0b9a8cc103b8"')] - - m.communicate = mock.MagicMock() - m.communicate.return_value = (None, None) - with pytest.raises(ValueError, matches="Failed to parse input."): - v(b'foobar') + with open(p, "rb") as f: + raw = f.read() + content_type, output = v(raw) + assert content_type == "Protobuf" + assert output == [[('text', '1: 3bbc333c-e61c-433b-819a-0b9a8cc103b8')]] + with pytest.raises(ValueError, matches="Failed to parse input."): + v(b'foobar') -def test_view_protobuf_availability(): - with mock.patch('subprocess.Popen') as n: - m = mock.Mock() - attrs = {'communicate.return_value': (b'libprotoc fake version', True)} - m.configure_mock(**attrs) - n.return_value = m - assert protobuf.ViewProtobuf().is_available() +@pytest.mark.parametrize("filename", ["protobuf02", "protobuf03"]) +def test_format_pbuf(filename): + path = data.path(filename) + with open(path, "rb") as f: + input = f.read() + with open(path + "-decoded") as f: + expected = f.read() - m = mock.Mock() - attrs = {'communicate.return_value': (b'command not found', True)} - m.configure_mock(**attrs) - n.return_value = m - assert not protobuf.ViewProtobuf().is_available() - - -def test_view_protobuf_fallback(): - with mock.patch('subprocess.Popen.communicate') as m: - m.side_effect = OSError() - v = full_eval(protobuf.ViewProtobuf()) - with pytest.raises(NotImplementedError, matches='protoc not found'): - v(b'foobar') + assert protobuf.format_pbuf(input) == expected diff --git a/test/mitmproxy/data/protobuf01 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf01 similarity index 100% rename from test/mitmproxy/data/protobuf01 rename to test/mitmproxy/contentviews/test_protobuf_data/protobuf01 diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf02 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02 new file mode 100644 index 000000000..a47c45d51 Binary files /dev/null and b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02 differ diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded new file mode 100644 index 000000000..9be61e28e --- /dev/null +++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf02-decoded @@ -0,0 +1,65 @@ +1 { + 1: tpbuf + 4 { + 1: Person + 2 { + 1: name + 3: 1 + 4: 2 + 5: 9 + } + 2 { + 1: id + 3: 2 + 4: 2 + 5: 5 + } + 2 { + 1 { + 12: 1818845549 + } + 3: 3 + 4: 1 + 5: 9 + } + 2 { + 1: phone + 3: 4 + 4: 3 + 5: 11 + 6: .Person.PhoneNumber + } + 3 { + 1: PhoneNumber + 2 { + 1: number + 3: 1 + 4: 2 + 5: 9 + } + 2 { + 1: type + 3: 2 + 4: 1 + 5: 14 + 6: .Person.PhoneType + 7: HOME + } + } + 4 { + 1: PhoneType + 2 { + 1: MOBILE + 2: 0 + } + 2 { + 1: HOME + 2: 1 + } + 2 { + 1: WORK + 2: 2 + } + } + } +} diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf03 b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03 new file mode 100644 index 000000000..9fb230b3a --- /dev/null +++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03 @@ -0,0 +1 @@ +€ ð \ No newline at end of file diff --git a/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded new file mode 100644 index 000000000..3d3392e16 --- /dev/null +++ b/test/mitmproxy/contentviews/test_protobuf_data/protobuf03-decoded @@ -0,0 +1,4 @@ +2 { +3: 3840 +4: 2160 +} diff --git a/test/mitmproxy/proxy/protocol/test_http1.py b/test/mitmproxy/proxy/protocol/test_http1.py index 1eff86666..4cca370c3 100644 --- a/test/mitmproxy/proxy/protocol/test_http1.py +++ b/test/mitmproxy/proxy/protocol/test_http1.py @@ -1,7 +1,6 @@ from unittest import mock import pytest -from mitmproxy import exceptions from mitmproxy.test import tflow from mitmproxy.net.http import http1 from mitmproxy.net.tcp import TCPClient @@ -108,9 +107,5 @@ class TestStreaming(tservers.HTTPProxyTest): r = p.request("post:'%s/p/200:b@10000'" % self.server.urlbase) assert len(r.content) == 10000 - if streaming: - with pytest.raises(exceptions.HttpReadDisconnect): # as the assertion in assert_write fails - # request with 10000 bytes - p.request("post:'%s/p/200':b@10000" % self.server.urlbase) - else: - assert p.request("post:'%s/p/200':b@10000" % self.server.urlbase) + # request with 10000 bytes + assert p.request("post:'%s/p/200':b@10000" % self.server.urlbase) diff --git a/test/mitmproxy/proxy/protocol/test_http2.py b/test/mitmproxy/proxy/protocol/test_http2.py index 261f8415c..487d88905 100644 --- a/test/mitmproxy/proxy/protocol/test_http2.py +++ b/test/mitmproxy/proxy/protocol/test_http2.py @@ -14,6 +14,7 @@ import mitmproxy.net from ...net import tservers as net_tservers from mitmproxy import exceptions from mitmproxy.net.http import http1, http2 +from pathod.language import generators from ... import tservers from ....conftest import requires_alpn @@ -166,7 +167,8 @@ class _Http2TestBase: end_stream=None, priority_exclusive=None, priority_depends_on=None, - priority_weight=None): + priority_weight=None, + streaming=False): if headers is None: headers = [] if end_stream is None: @@ -182,7 +184,8 @@ class _Http2TestBase: ) if body: h2_conn.send_data(stream_id, body) - h2_conn.end_stream(stream_id) + if not streaming: + h2_conn.end_stream(stream_id) wfile.write(h2_conn.data_to_send()) wfile.flush() @@ -862,3 +865,120 @@ class TestConnectionTerminated(_Http2Test): assert connection_terminated_event.error_code == 5 assert connection_terminated_event.last_stream_id == 42 assert connection_terminated_event.additional_data == b'foobar' + + +@requires_alpn +class TestRequestStreaming(_Http2Test): + + @classmethod + def handle_server_event(cls, event, h2_conn, rfile, wfile): + if isinstance(event, h2.events.ConnectionTerminated): + return False + elif isinstance(event, h2.events.DataReceived): + data = event.data + assert data + h2_conn.close_connection(error_code=5, last_stream_id=42, additional_data=data) + wfile.write(h2_conn.data_to_send()) + wfile.flush() + + return True + + @pytest.mark.parametrize('streaming', [True, False]) + def test_request_streaming(self, streaming): + class Stream: + def requestheaders(self, f): + f.request.stream = streaming + + self.master.addons.add(Stream()) + h2_conn = self.setup_connection() + body = generators.RandomGenerator("bytes", 100)[:] + self._send_request( + self.client.wfile, + h2_conn, + headers=[ + (':authority', "127.0.0.1:{}".format(self.server.server.address[1])), + (':method', 'GET'), + (':scheme', 'https'), + (':path', '/'), + + ], + body=body, + streaming=True + ) + done = False + connection_terminated_event = None + self.client.rfile.o.settimeout(2) + while not done: + try: + raw = b''.join(http2.read_raw_frame(self.client.rfile)) + events = h2_conn.receive_data(raw) + + for event in events: + if isinstance(event, h2.events.ConnectionTerminated): + connection_terminated_event = event + done = True + except: + break + + if streaming: + assert connection_terminated_event.additional_data == body + else: + assert connection_terminated_event is None + + +@requires_alpn +class TestResponseStreaming(_Http2Test): + + @classmethod + def handle_server_event(cls, event, h2_conn, rfile, wfile): + if isinstance(event, h2.events.ConnectionTerminated): + return False + elif isinstance(event, h2.events.RequestReceived): + data = generators.RandomGenerator("bytes", 100)[:] + h2_conn.send_headers(event.stream_id, [ + (':status', '200'), + ('content-length', '100') + ]) + h2_conn.send_data(event.stream_id, data) + wfile.write(h2_conn.data_to_send()) + wfile.flush() + return True + + @pytest.mark.parametrize('streaming', [True, False]) + def test_response_streaming(self, streaming): + class Stream: + def responseheaders(self, f): + f.response.stream = streaming + + self.master.addons.add(Stream()) + h2_conn = self.setup_connection() + self._send_request( + self.client.wfile, + h2_conn, + headers=[ + (':authority', "127.0.0.1:{}".format(self.server.server.address[1])), + (':method', 'GET'), + (':scheme', 'https'), + (':path', '/'), + + ] + ) + done = False + self.client.rfile.o.settimeout(2) + data = None + while not done: + try: + raw = b''.join(http2.read_raw_frame(self.client.rfile)) + events = h2_conn.receive_data(raw) + + for event in events: + if isinstance(event, h2.events.DataReceived): + data = event.data + done = True + except: + break + + if streaming: + assert data + else: + assert data is None diff --git a/test/mitmproxy/proxy/protocol/test_websocket.py b/test/mitmproxy/proxy/protocol/test_websocket.py index f78e173fc..58857f920 100644 --- a/test/mitmproxy/proxy/protocol/test_websocket.py +++ b/test/mitmproxy/proxy/protocol/test_websocket.py @@ -155,7 +155,13 @@ class TestSimple(_WebSocketTest): wfile.write(bytes(frame)) wfile.flush() - def test_simple(self): + @pytest.mark.parametrize('streaming', [True, False]) + def test_simple(self, streaming): + class Stream: + def websocket_start(self, f): + f.stream = streaming + + self.master.addons.add(Stream()) self.setup_connection() frame = websockets.Frame.from_file(self.client.rfile) @@ -328,3 +334,32 @@ class TestInvalidFrame(_WebSocketTest): frame = websockets.Frame.from_file(self.client.rfile) assert frame.header.opcode == 15 assert frame.payload == b'foobar' + + +class TestStreaming(_WebSocketTest): + + @classmethod + def handle_websockets(cls, rfile, wfile): + wfile.write(bytes(websockets.Frame(opcode=websockets.OPCODE.TEXT, payload=b'server-foobar'))) + wfile.flush() + + @pytest.mark.parametrize('streaming', [True, False]) + def test_streaming(self, streaming): + class Stream: + def websocket_start(self, f): + f.stream = streaming + + self.master.addons.add(Stream()) + self.setup_connection() + + frame = None + if not streaming: + with pytest.raises(exceptions.TcpDisconnect): # Reader.safe_read get nothing as result + frame = websockets.Frame.from_file(self.client.rfile) + assert frame is None + + else: + frame = websockets.Frame.from_file(self.client.rfile) + + assert frame + assert self.master.state.flows[1].messages == [] # Message not appended as the final frame isn't received diff --git a/test/mitmproxy/proxy/test_server.py b/test/mitmproxy/proxy/test_server.py index bd61f6004..4cae756a7 100644 --- a/test/mitmproxy/proxy/test_server.py +++ b/test/mitmproxy/proxy/test_server.py @@ -239,13 +239,28 @@ class TestHTTP(tservers.HTTPProxyTest, CommonMixin): p.request("get:'%s'" % response) def test_reconnect(self): - req = "get:'%s/p/200:b@1:da'" % self.server.urlbase + req = "get:'%s/p/200:b@1'" % self.server.urlbase p = self.pathoc() + + class MockOnce: + call = 0 + + def mock_once(self, http1obj, req): + self.call += 1 + if self.call == 1: + raise exceptions.TcpDisconnect + else: + headers = http1.assemble_request_head(req) + http1obj.server_conn.wfile.write(headers) + http1obj.server_conn.wfile.flush() + with p.connect(): - assert p.request(req) - # Server has disconnected. Mitmproxy should detect this, and reconnect. - assert p.request(req) - assert p.request(req) + with mock.patch("mitmproxy.proxy.protocol.http1.Http1Layer.send_request_headers", + side_effect=MockOnce().mock_once, autospec=True): + # Server disconnects while sending headers but mitmproxy reconnects + resp = p.request(req) + assert resp + assert resp.status_code == 200 def test_get_connection_switching(self): req = "get:'%s/p/200:b@1'" @@ -1072,6 +1087,23 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest): proxified to an upstream http proxy, we need to send the CONNECT request again. """ + + class MockOnce: + call = 0 + + def mock_once(self, http1obj, req): + self.call += 1 + + if self.call == 2: + headers = http1.assemble_request_head(req) + http1obj.server_conn.wfile.write(headers) + http1obj.server_conn.wfile.flush() + raise exceptions.TcpDisconnect + else: + headers = http1.assemble_request_head(req) + http1obj.server_conn.wfile.write(headers) + http1obj.server_conn.wfile.flush() + self.chain[0].tmaster.addons.add(RequestKiller([1, 2])) self.chain[1].tmaster.addons.add(RequestKiller([1])) @@ -1086,7 +1118,10 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest): assert len(self.chain[0].tmaster.state.flows) == 1 assert len(self.chain[1].tmaster.state.flows) == 1 - req = p.request("get:'/p/418:b\"content2\"'") + with mock.patch("mitmproxy.proxy.protocol.http1.Http1Layer.send_request_headers", + side_effect=MockOnce().mock_once, autospec=True): + req = p.request("get:'/p/418:b\"content2\"'") + assert req.status_code == 502 assert len(self.proxy.tmaster.state.flows) == 2 diff --git a/web/src/js/components/Modal/Option.jsx b/web/src/js/components/Modal/Option.jsx new file mode 100644 index 000000000..e2f6708f4 --- /dev/null +++ b/web/src/js/components/Modal/Option.jsx @@ -0,0 +1,138 @@ +import React, { Component } from "react" +import PropTypes from "prop-types" +import { connect } from "react-redux" +import { update as updateOptions } from "../../ducks/options" +import { Key } from "../../utils" + +const stopPropagation = e => { + if (e.keyCode !== Key.ESC) { + e.stopPropagation() + } +} + +BooleanOption.PropTypes = { + value: PropTypes.bool.isRequired, + onChange: PropTypes.func.isRequired, +} +function BooleanOption({ value, onChange, ...props }) { + return ( +