From a9b4560187df02c0d69e89a4892587a65bb03ea7 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sat, 12 Nov 2016 18:28:37 +1300 Subject: [PATCH] Refine handling of HTTP CONNECT - CONNECT requests do not generate the usual http events. Instead, they generate the http_connect event and handlers then have the option of setting an error response to abort the connect. - The connect handler is called for both upstream proxy and regular proxy CONNECTs. --- docs/scripting/events.rst | 13 +++ mitmproxy/proxy/protocol/http.py | 136 +++++++++++++++++--------- test/mitmproxy/protocol/test_http1.py | 2 +- test/mitmproxy/test_server.py | 81 +++++---------- 4 files changed, 128 insertions(+), 104 deletions(-) diff --git a/docs/scripting/events.rst b/docs/scripting/events.rst index 622664853..a57214032 100644 --- a/docs/scripting/events.rst +++ b/docs/scripting/events.rst @@ -98,6 +98,19 @@ HTTP Events :widths: 40 60 :header-rows: 0 + * - .. py:function:: http_connect(flow) + + - Called when we receive an HTTP CONNECT request. Setting a non 2xx + response on the flow will return the response to the client abort the + connection. CONNECT requests and responses do not generate the usual + HTTP handler events. CONNECT requests are only valid in regular and + upstream proxy modes. + + *flow* + A ``models.HTTPFlow`` object. The flow is guaranteed to have + non-None ``request`` and ``requestheaders`` attributes. + + * - .. py:function:: request(flow) - Called when a client request has been received. diff --git a/mitmproxy/proxy/protocol/http.py b/mitmproxy/proxy/protocol/http.py index d99812f80..15f3f7cf0 100644 --- a/mitmproxy/proxy/protocol/http.py +++ b/mitmproxy/proxy/protocol/http.py @@ -114,6 +114,10 @@ class UpstreamConnectLayer(base.Layer): self.server_conn.address = address +def is_ok(status): + return 200 <= status < 300 + + class HTTPMode(enum.Enum): regular = 1 transparent = 2 @@ -168,43 +172,85 @@ class HttpLayer(base.Layer): if not self._process_flow(flow): return + def handle_regular_connect(self, f): + self.connect_request = True + + try: + self.set_server((f.request.host, f.request.port)) + except ( + exceptions.ProtocolException, exceptions.NetlibException + ) as e: + # HTTPS tasting means that ordinary errors like resolution + # and connection errors can happen here. + self.send_error_response(502, repr(e)) + f.error = flow.Error(str(e)) + self.channel.ask("error", f) + return False + + if f.response: + resp = f.response + else: + resp = http.make_connect_response(f.request.data.http_version) + + self.send_response(resp) + + if is_ok(resp.status_code): + layer = self.ctx.next_layer(self) + layer() + + return False + + def handle_upstream_connect(self, f): + self.establish_server_connection( + f.request.host, + f.request.port, + f.request.scheme + ) + self.send_request(f.request) + f.response = self.read_response_headers() + f.response.data.content = b"".join( + self.read_response_body(f.request, f.response) + ) + self.send_response(f.response) + if is_ok(f.response.status_code): + layer = UpstreamConnectLayer(self, f.request) + return layer() + return False + def _process_flow(self, f): try: try: request = self.read_request_headers(f) except exceptions.HttpReadDisconnect: - # don't throw an error for disconnects that happen before/between requests. - return False - - # Regular Proxy Mode: Handle CONNECT - if self.mode is HTTPMode.regular and request.first_line_format == "authority": - self.connect_request = True - # The standards are silent on what we should do with a CONNECT - # request body, so although it's not common, it's allowed. - request.data.content = b"".join(self.read_request_body(request)) - request.timestamp_end = time.time() - - self.channel.ask("http_connect", f) - - try: - self.set_server((request.host, request.port)) - except (exceptions.ProtocolException, exceptions.NetlibException) as e: - # HTTPS tasting means that ordinary errors like resolution and - # connection errors can happen here. - self.send_error_response(502, repr(e)) - f.error = flow.Error(str(e)) - self.channel.ask("error", f) - return False - self.send_response(http.make_connect_response(request.data.http_version)) - layer = self.ctx.next_layer(self) - layer() + # don't throw an error for disconnects that happen + # before/between requests. return False f.request = request + + if request.first_line_format == "authority": + # The standards are silent on what we should do with a CONNECT + # request body, so although it's not common, it's allowed. + f.request.data.content = b"".join( + self.read_request_body(f.request) + ) + f.request.timestamp_end = time.time() + self.channel.ask("http_connect", f) + + if self.mode is HTTPMode.regular: + return self.handle_regular_connect(f) + elif self.mode is HTTPMode.upstream: + return self.handle_upstream_connect(f) + else: + msg = "Unexpected CONNECT request." + self.send_error_response(400, msg) + raise exceptions.ProtocolException(msg) + self.channel.ask("requestheaders", f) if request.headers.get("expect", "").lower() == "100-continue": - # TODO: We may have to use send_response_headers for HTTP2 here. + # TODO: We may have to use send_response_headers for HTTP2 + # here. self.send_response(http.expect_continue_response) request.headers.pop("expect") @@ -222,10 +268,10 @@ class HttpLayer(base.Layer): self.log("request", "debug", [repr(request)]) - # Handle Proxy Authentication - # Proxy Authentication conceptually does not work in transparent mode. - # We catch this misconfiguration on startup. Here, we sort out requests - # after a successful CONNECT request (which do not need to be validated anymore) + # Handle Proxy Authentication Proxy Authentication conceptually does + # not work in transparent mode. We catch this misconfiguration on + # startup. Here, we sort out requests after a successful CONNECT + # request (which do not need to be validated anymore) if not self.connect_request and not self.authenticate(request): return False @@ -235,13 +281,14 @@ class HttpLayer(base.Layer): if self.config.options.mode == "reverse": f.request.headers["Host"] = self.config.upstream_server.address.host - # Determine .scheme, .host and .port attributes for inline scripts. - # For absolute-form requests, they are directly given in the request. - # For authority-form requests, we only need to determine the request scheme. - # For relative-form requests, we need to determine host and port as - # well. + # Determine .scheme, .host and .port attributes for inline scripts. For + # absolute-form requests, they are directly given in the request. For + # authority-form requests, we only need to determine the request + # scheme. For relative-form requests, we need to determine host and + # port as well. if self.mode is HTTPMode.transparent: - # Setting request.host also updates the host header, which we want to preserve + # Setting request.host also updates the host header, which we want + # to preserve host_header = f.request.headers.get("host", None) f.request.host = self.__initial_server_conn.address.host f.request.port = self.__initial_server_conn.address.port @@ -296,17 +343,16 @@ class HttpLayer(base.Layer): self.connect() get_response() - # call the appropriate script hook - this is an opportunity for an - # inline script to set f.stream = True + # call the appropriate script hook - this is an opportunity for + # an inline script to set f.stream = True self.channel.ask("responseheaders", f) if f.response.stream: f.response.data.content = None else: - f.response.data.content = b"".join(self.read_response_body( - f.request, - f.response - )) + f.response.data.content = b"".join( + self.read_response_body(f.request, f.response) + ) f.response.timestamp_end = time.time() # no further manipulation of self.server_conn beyond this point @@ -365,12 +411,6 @@ class HttpLayer(base.Layer): layer() return False # should never be reached - # Upstream Proxy Mode: Handle CONNECT - if f.request.first_line_format == "authority" and f.response.status_code == 200: - layer = UpstreamConnectLayer(self, f.request) - layer() - return False - except (exceptions.ProtocolException, exceptions.NetlibException) as e: self.send_error_response(502, repr(e)) if not f.response: diff --git a/test/mitmproxy/protocol/test_http1.py b/test/mitmproxy/protocol/test_http1.py index 5026bef18..cd937adad 100644 --- a/test/mitmproxy/protocol/test_http1.py +++ b/test/mitmproxy/protocol/test_http1.py @@ -20,7 +20,7 @@ class TestInvalidRequests(tservers.HTTPProxyTest): with p.connect(): r = p.request("connect:'%s:%s'" % ("127.0.0.1", self.server2.port)) assert r.status_code == 400 - assert b"Invalid HTTP request form" in r.content + assert b"Unexpected CONNECT" in r.content def test_relative_request(self): p = self.pathoc_raw() diff --git a/test/mitmproxy/test_server.py b/test/mitmproxy/test_server.py index 9fa6ed06b..dab47c9c2 100644 --- a/test/mitmproxy/test_server.py +++ b/test/mitmproxy/test_server.py @@ -50,10 +50,7 @@ class CommonMixin: def test_replay(self): assert self.pathod("304").status_code == 304 - if isinstance(self, tservers.HTTPUpstreamProxyTest) and self.ssl: - assert len(self.master.state.flows) == 2 - else: - assert len(self.master.state.flows) == 1 + assert len(self.master.state.flows) == 1 l = self.master.state.flows[-1] assert l.response.status_code == 304 l.request.path = "/p/305" @@ -952,10 +949,10 @@ class TestUpstreamProxySSL( assert req.status_code == 418 # CONNECT from pathoc to chain[0], - assert self.proxy.tmaster.state.flow_count() == 2 + assert self.proxy.tmaster.state.flow_count() == 1 # request from pathoc to chain[0] # CONNECT from proxy to chain[1], - assert self.chain[0].tmaster.state.flow_count() == 2 + assert self.chain[0].tmaster.state.flow_count() == 1 # request from proxy to chain[1] # request from chain[0] (regular proxy doesn't store CONNECTs) assert self.chain[1].tmaster.state.flow_count() == 1 @@ -978,21 +975,12 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest): def test_reconnect(self): """ Tests proper functionality of ConnectionHandler.server_reconnect mock. - If we have a disconnect on a secure connection that's transparently proxified to - an upstream http proxy, we need to send the CONNECT request again. + If we have a disconnect on a secure connection that's transparently + proxified to an upstream http proxy, we need to send the CONNECT + request again. """ - self.chain[1].tmaster.addons.add( - RequestKiller([2]) - ) - self.chain[0].tmaster.addons.add( - RequestKiller( - [ - 1, # CONNECT - 3, # reCONNECT - 4 # request - ] - ) - ) + self.chain[0].tmaster.addons.add(RequestKiller([1, 2])) + self.chain[1].tmaster.addons.add(RequestKiller([1])) p = self.pathoc() with p.connect(): @@ -1000,44 +988,27 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest): assert req.content == b"content" assert req.status_code == 418 - assert self.proxy.tmaster.state.flow_count() == 2 # CONNECT and request - # CONNECT, failing request, - assert self.chain[0].tmaster.state.flow_count() == 4 - # reCONNECT, request - # failing request, request - assert self.chain[1].tmaster.state.flow_count() == 2 - # (doesn't store (repeated) CONNECTs from chain[0] - # as it is a regular proxy) - - assert not self.chain[1].tmaster.state.flows[0].response # killed - assert self.chain[1].tmaster.state.flows[1].response - - assert self.proxy.tmaster.state.flows[0].request.first_line_format == "authority" - assert self.proxy.tmaster.state.flows[1].request.first_line_format == "relative" - - assert self.chain[0].tmaster.state.flows[ - 0].request.first_line_format == "authority" - assert self.chain[0].tmaster.state.flows[ - 1].request.first_line_format == "relative" - assert self.chain[0].tmaster.state.flows[ - 2].request.first_line_format == "authority" - assert self.chain[0].tmaster.state.flows[ - 3].request.first_line_format == "relative" - - assert self.chain[1].tmaster.state.flows[ - 0].request.first_line_format == "relative" - assert self.chain[1].tmaster.state.flows[ - 1].request.first_line_format == "relative" + # First request goes through all three proxies exactly once + assert self.proxy.tmaster.state.flow_count() == 1 + assert self.chain[0].tmaster.state.flow_count() == 1 + assert self.chain[1].tmaster.state.flow_count() == 1 req = p.request("get:'/p/418:b\"content2\"'") - assert req.status_code == 502 - assert self.proxy.tmaster.state.flow_count() == 3 # + new request - # + new request, repeated CONNECT from chain[1] - assert self.chain[0].tmaster.state.flow_count() == 6 - # (both terminated) - # nothing happened here - assert self.chain[1].tmaster.state.flow_count() == 2 + + assert self.proxy.tmaster.state.flow_count() == 2 + assert self.chain[0].tmaster.state.flow_count() == 2 + # Upstream sees two requests due to reconnection attempt + assert self.chain[1].tmaster.state.flow_count() == 3 + assert not self.chain[1].tmaster.state.flows[-1].response + assert not self.chain[1].tmaster.state.flows[-2].response + + # Reconnection failed, so we're now disconnected + tutils.raises( + exceptions.HttpException, + p.request, + "get:'/p/418:b\"content3\"'" + ) class AddUpstreamCertsToClientChainMixin: