Refine handling of HTTP CONNECT

- CONNECT requests do not generate the usual http events. Instead, they
generate the http_connect event and handlers then have the option of setting an
error response to abort the connect.
- The connect handler is called for both upstream proxy and regular proxy CONNECTs.
This commit is contained in:
Aldo Cortesi 2016-11-12 18:28:37 +13:00
parent 38f8d9e541
commit a9b4560187
4 changed files with 128 additions and 104 deletions

View File

@ -98,6 +98,19 @@ HTTP Events
:widths: 40 60 :widths: 40 60
:header-rows: 0 :header-rows: 0
* - .. py:function:: http_connect(flow)
- Called when we receive an HTTP CONNECT request. Setting a non 2xx
response on the flow will return the response to the client abort the
connection. CONNECT requests and responses do not generate the usual
HTTP handler events. CONNECT requests are only valid in regular and
upstream proxy modes.
*flow*
A ``models.HTTPFlow`` object. The flow is guaranteed to have
non-None ``request`` and ``requestheaders`` attributes.
* - .. py:function:: request(flow) * - .. py:function:: request(flow)
- Called when a client request has been received. - Called when a client request has been received.

View File

@ -114,6 +114,10 @@ class UpstreamConnectLayer(base.Layer):
self.server_conn.address = address self.server_conn.address = address
def is_ok(status):
return 200 <= status < 300
class HTTPMode(enum.Enum): class HTTPMode(enum.Enum):
regular = 1 regular = 1
transparent = 2 transparent = 2
@ -168,43 +172,85 @@ class HttpLayer(base.Layer):
if not self._process_flow(flow): if not self._process_flow(flow):
return return
def handle_regular_connect(self, f):
self.connect_request = True
try:
self.set_server((f.request.host, f.request.port))
except (
exceptions.ProtocolException, exceptions.NetlibException
) as e:
# HTTPS tasting means that ordinary errors like resolution
# and connection errors can happen here.
self.send_error_response(502, repr(e))
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
if f.response:
resp = f.response
else:
resp = http.make_connect_response(f.request.data.http_version)
self.send_response(resp)
if is_ok(resp.status_code):
layer = self.ctx.next_layer(self)
layer()
return False
def handle_upstream_connect(self, f):
self.establish_server_connection(
f.request.host,
f.request.port,
f.request.scheme
)
self.send_request(f.request)
f.response = self.read_response_headers()
f.response.data.content = b"".join(
self.read_response_body(f.request, f.response)
)
self.send_response(f.response)
if is_ok(f.response.status_code):
layer = UpstreamConnectLayer(self, f.request)
return layer()
return False
def _process_flow(self, f): def _process_flow(self, f):
try: try:
try: try:
request = self.read_request_headers(f) request = self.read_request_headers(f)
except exceptions.HttpReadDisconnect: except exceptions.HttpReadDisconnect:
# don't throw an error for disconnects that happen before/between requests. # don't throw an error for disconnects that happen
return False # before/between requests.
# Regular Proxy Mode: Handle CONNECT
if self.mode is HTTPMode.regular and request.first_line_format == "authority":
self.connect_request = True
# The standards are silent on what we should do with a CONNECT
# request body, so although it's not common, it's allowed.
request.data.content = b"".join(self.read_request_body(request))
request.timestamp_end = time.time()
self.channel.ask("http_connect", f)
try:
self.set_server((request.host, request.port))
except (exceptions.ProtocolException, exceptions.NetlibException) as e:
# HTTPS tasting means that ordinary errors like resolution and
# connection errors can happen here.
self.send_error_response(502, repr(e))
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
self.send_response(http.make_connect_response(request.data.http_version))
layer = self.ctx.next_layer(self)
layer()
return False return False
f.request = request f.request = request
if request.first_line_format == "authority":
# The standards are silent on what we should do with a CONNECT
# request body, so although it's not common, it's allowed.
f.request.data.content = b"".join(
self.read_request_body(f.request)
)
f.request.timestamp_end = time.time()
self.channel.ask("http_connect", f)
if self.mode is HTTPMode.regular:
return self.handle_regular_connect(f)
elif self.mode is HTTPMode.upstream:
return self.handle_upstream_connect(f)
else:
msg = "Unexpected CONNECT request."
self.send_error_response(400, msg)
raise exceptions.ProtocolException(msg)
self.channel.ask("requestheaders", f) self.channel.ask("requestheaders", f)
if request.headers.get("expect", "").lower() == "100-continue": if request.headers.get("expect", "").lower() == "100-continue":
# TODO: We may have to use send_response_headers for HTTP2 here. # TODO: We may have to use send_response_headers for HTTP2
# here.
self.send_response(http.expect_continue_response) self.send_response(http.expect_continue_response)
request.headers.pop("expect") request.headers.pop("expect")
@ -222,10 +268,10 @@ class HttpLayer(base.Layer):
self.log("request", "debug", [repr(request)]) self.log("request", "debug", [repr(request)])
# Handle Proxy Authentication # Handle Proxy Authentication Proxy Authentication conceptually does
# Proxy Authentication conceptually does not work in transparent mode. # not work in transparent mode. We catch this misconfiguration on
# We catch this misconfiguration on startup. Here, we sort out requests # startup. Here, we sort out requests after a successful CONNECT
# after a successful CONNECT request (which do not need to be validated anymore) # request (which do not need to be validated anymore)
if not self.connect_request and not self.authenticate(request): if not self.connect_request and not self.authenticate(request):
return False return False
@ -235,13 +281,14 @@ class HttpLayer(base.Layer):
if self.config.options.mode == "reverse": if self.config.options.mode == "reverse":
f.request.headers["Host"] = self.config.upstream_server.address.host f.request.headers["Host"] = self.config.upstream_server.address.host
# Determine .scheme, .host and .port attributes for inline scripts. # Determine .scheme, .host and .port attributes for inline scripts. For
# For absolute-form requests, they are directly given in the request. # absolute-form requests, they are directly given in the request. For
# For authority-form requests, we only need to determine the request scheme. # authority-form requests, we only need to determine the request
# For relative-form requests, we need to determine host and port as # scheme. For relative-form requests, we need to determine host and
# well. # port as well.
if self.mode is HTTPMode.transparent: if self.mode is HTTPMode.transparent:
# Setting request.host also updates the host header, which we want to preserve # Setting request.host also updates the host header, which we want
# to preserve
host_header = f.request.headers.get("host", None) host_header = f.request.headers.get("host", None)
f.request.host = self.__initial_server_conn.address.host f.request.host = self.__initial_server_conn.address.host
f.request.port = self.__initial_server_conn.address.port f.request.port = self.__initial_server_conn.address.port
@ -296,17 +343,16 @@ class HttpLayer(base.Layer):
self.connect() self.connect()
get_response() get_response()
# call the appropriate script hook - this is an opportunity for an # call the appropriate script hook - this is an opportunity for
# inline script to set f.stream = True # an inline script to set f.stream = True
self.channel.ask("responseheaders", f) self.channel.ask("responseheaders", f)
if f.response.stream: if f.response.stream:
f.response.data.content = None f.response.data.content = None
else: else:
f.response.data.content = b"".join(self.read_response_body( f.response.data.content = b"".join(
f.request, self.read_response_body(f.request, f.response)
f.response )
))
f.response.timestamp_end = time.time() f.response.timestamp_end = time.time()
# no further manipulation of self.server_conn beyond this point # no further manipulation of self.server_conn beyond this point
@ -365,12 +411,6 @@ class HttpLayer(base.Layer):
layer() layer()
return False # should never be reached return False # should never be reached
# Upstream Proxy Mode: Handle CONNECT
if f.request.first_line_format == "authority" and f.response.status_code == 200:
layer = UpstreamConnectLayer(self, f.request)
layer()
return False
except (exceptions.ProtocolException, exceptions.NetlibException) as e: except (exceptions.ProtocolException, exceptions.NetlibException) as e:
self.send_error_response(502, repr(e)) self.send_error_response(502, repr(e))
if not f.response: if not f.response:

View File

@ -20,7 +20,7 @@ class TestInvalidRequests(tservers.HTTPProxyTest):
with p.connect(): with p.connect():
r = p.request("connect:'%s:%s'" % ("127.0.0.1", self.server2.port)) r = p.request("connect:'%s:%s'" % ("127.0.0.1", self.server2.port))
assert r.status_code == 400 assert r.status_code == 400
assert b"Invalid HTTP request form" in r.content assert b"Unexpected CONNECT" in r.content
def test_relative_request(self): def test_relative_request(self):
p = self.pathoc_raw() p = self.pathoc_raw()

View File

@ -50,10 +50,7 @@ class CommonMixin:
def test_replay(self): def test_replay(self):
assert self.pathod("304").status_code == 304 assert self.pathod("304").status_code == 304
if isinstance(self, tservers.HTTPUpstreamProxyTest) and self.ssl: assert len(self.master.state.flows) == 1
assert len(self.master.state.flows) == 2
else:
assert len(self.master.state.flows) == 1
l = self.master.state.flows[-1] l = self.master.state.flows[-1]
assert l.response.status_code == 304 assert l.response.status_code == 304
l.request.path = "/p/305" l.request.path = "/p/305"
@ -952,10 +949,10 @@ class TestUpstreamProxySSL(
assert req.status_code == 418 assert req.status_code == 418
# CONNECT from pathoc to chain[0], # CONNECT from pathoc to chain[0],
assert self.proxy.tmaster.state.flow_count() == 2 assert self.proxy.tmaster.state.flow_count() == 1
# request from pathoc to chain[0] # request from pathoc to chain[0]
# CONNECT from proxy to chain[1], # CONNECT from proxy to chain[1],
assert self.chain[0].tmaster.state.flow_count() == 2 assert self.chain[0].tmaster.state.flow_count() == 1
# request from proxy to chain[1] # request from proxy to chain[1]
# request from chain[0] (regular proxy doesn't store CONNECTs) # request from chain[0] (regular proxy doesn't store CONNECTs)
assert self.chain[1].tmaster.state.flow_count() == 1 assert self.chain[1].tmaster.state.flow_count() == 1
@ -978,21 +975,12 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest):
def test_reconnect(self): def test_reconnect(self):
""" """
Tests proper functionality of ConnectionHandler.server_reconnect mock. Tests proper functionality of ConnectionHandler.server_reconnect mock.
If we have a disconnect on a secure connection that's transparently proxified to If we have a disconnect on a secure connection that's transparently
an upstream http proxy, we need to send the CONNECT request again. proxified to an upstream http proxy, we need to send the CONNECT
request again.
""" """
self.chain[1].tmaster.addons.add( self.chain[0].tmaster.addons.add(RequestKiller([1, 2]))
RequestKiller([2]) self.chain[1].tmaster.addons.add(RequestKiller([1]))
)
self.chain[0].tmaster.addons.add(
RequestKiller(
[
1, # CONNECT
3, # reCONNECT
4 # request
]
)
)
p = self.pathoc() p = self.pathoc()
with p.connect(): with p.connect():
@ -1000,44 +988,27 @@ class TestProxyChainingSSLReconnect(tservers.HTTPUpstreamProxyTest):
assert req.content == b"content" assert req.content == b"content"
assert req.status_code == 418 assert req.status_code == 418
assert self.proxy.tmaster.state.flow_count() == 2 # CONNECT and request # First request goes through all three proxies exactly once
# CONNECT, failing request, assert self.proxy.tmaster.state.flow_count() == 1
assert self.chain[0].tmaster.state.flow_count() == 4 assert self.chain[0].tmaster.state.flow_count() == 1
# reCONNECT, request assert self.chain[1].tmaster.state.flow_count() == 1
# failing request, request
assert self.chain[1].tmaster.state.flow_count() == 2
# (doesn't store (repeated) CONNECTs from chain[0]
# as it is a regular proxy)
assert not self.chain[1].tmaster.state.flows[0].response # killed
assert self.chain[1].tmaster.state.flows[1].response
assert self.proxy.tmaster.state.flows[0].request.first_line_format == "authority"
assert self.proxy.tmaster.state.flows[1].request.first_line_format == "relative"
assert self.chain[0].tmaster.state.flows[
0].request.first_line_format == "authority"
assert self.chain[0].tmaster.state.flows[
1].request.first_line_format == "relative"
assert self.chain[0].tmaster.state.flows[
2].request.first_line_format == "authority"
assert self.chain[0].tmaster.state.flows[
3].request.first_line_format == "relative"
assert self.chain[1].tmaster.state.flows[
0].request.first_line_format == "relative"
assert self.chain[1].tmaster.state.flows[
1].request.first_line_format == "relative"
req = p.request("get:'/p/418:b\"content2\"'") req = p.request("get:'/p/418:b\"content2\"'")
assert req.status_code == 502 assert req.status_code == 502
assert self.proxy.tmaster.state.flow_count() == 3 # + new request
# + new request, repeated CONNECT from chain[1] assert self.proxy.tmaster.state.flow_count() == 2
assert self.chain[0].tmaster.state.flow_count() == 6 assert self.chain[0].tmaster.state.flow_count() == 2
# (both terminated) # Upstream sees two requests due to reconnection attempt
# nothing happened here assert self.chain[1].tmaster.state.flow_count() == 3
assert self.chain[1].tmaster.state.flow_count() == 2 assert not self.chain[1].tmaster.state.flows[-1].response
assert not self.chain[1].tmaster.state.flows[-2].response
# Reconnection failed, so we're now disconnected
tutils.raises(
exceptions.HttpException,
p.request,
"get:'/p/418:b\"content3\"'"
)
class AddUpstreamCertsToClientChainMixin: class AddUpstreamCertsToClientChainMixin: