[sans-io] improve next layer heuristics

This commit is contained in:
Maximilian Hils 2019-11-11 18:27:56 +01:00
parent 105cac231d
commit 0c04638d8d

View File

@ -1,12 +1,12 @@
import re
import typing import typing
from mitmproxy import ctx from mitmproxy import ctx, exceptions
from mitmproxy.net.tls import is_tls_record_magic from mitmproxy.net.tls import is_tls_record_magic
from mitmproxy.proxy.config import HostMatcher
from mitmproxy.proxy.protocol.http import HTTPMode from mitmproxy.proxy.protocol.http import HTTPMode
from mitmproxy.proxy2 import context, layer, layers from mitmproxy.proxy2 import context, layer, layers
from mitmproxy.proxy2.layers import modes from mitmproxy.proxy2.layers import modes
from mitmproxy.proxy2.layers.glue import GLUE_DEBUG from mitmproxy.proxy2.layers.tls import HTTP_ALPNS, parse_client_hello
LayerCls = typing.Type[layer.Layer] LayerCls = typing.Type[layer.Layer]
@ -23,93 +23,139 @@ def stack_match(
) )
class NextLayer: class HostMatcher:
check_tcp: HostMatcher def __init__(self, patterns: typing.Iterable[str] = tuple()):
self.patterns = patterns
self.regexes = [re.compile(p, re.IGNORECASE) for p in self.patterns]
def __init__(self): def __call__(self, address):
self.check_tcp = HostMatcher("tcp") if not address:
return False
host = f"{address[0]}:{address[1]}"
return any(rex.search(host) for rex in self.regexes)
def __bool__(self):
return bool(self.patterns)
class NextLayer:
ignore_hosts: typing.Iterable[re.Pattern] = ()
allow_hosts: typing.Iterable[re.Pattern] = ()
tcp_hosts: typing.Iterable[re.Pattern] = ()
def configure(self, updated): def configure(self, updated):
if "tcp_hosts" in updated: if "tcp_hosts" in updated:
self.check_tcp = HostMatcher(ctx.options.tcp_hosts) self.tcp_hosts = [
re.compile(x, re.IGNORECASE) for x in ctx.options.tcp_hosts
]
if "allow_hosts" in updated or "ignore_hosts" in updated:
if ctx.options.allow_hosts and ctx.options.ignore_hosts:
raise exceptions.OptionsError("The allow_hosts and ignore_hosts options are mutually exclusive.")
self.ignore_hosts = [
re.compile(x, re.IGNORECASE) for x in ctx.options.ignore_hosts
]
self.allow_hosts = [
re.compile(x, re.IGNORECASE) for x in ctx.options.allow_hosts
]
def ignore_connection(self, context: context.Context, data_client: bytes) -> typing.Optional[bool]:
if not ctx.options.ignore_hosts and not ctx.options.allow_hosts:
return False
addresses: typing.List[str] = [context.server.address]
if is_tls_record_magic(data_client):
try:
sni = parse_client_hello(data_client).sni
except ValueError:
return None # defer decision, wait for more input data
else:
addresses.append(sni.decode("idna"))
if ctx.options.ignore_hosts:
return any(
re.search(rex, address, re.IGNORECASE)
for address in addresses
for rex in ctx.options.ignore_hosts
)
elif ctx.options.allow_hosts:
return not any(
re.search(rex, address, re.IGNORECASE)
for address in addresses
for rex in ctx.options.allow_hosts
)
def next_layer(self, nextlayer: layer.NextLayer): def next_layer(self, nextlayer: layer.NextLayer):
if not isinstance(nextlayer, layer.NextLayer): nextlayer.layer = self._next_layer(nextlayer.context, nextlayer.data_client())
if GLUE_DEBUG:
print(f"[glue: skipping nextlayer for {nextlayer}]")
return
nextlayer.layer = self._next_layer(nextlayer, nextlayer.context)
def _next_layer(self, nextlayer: layer.NextLayer, context: context.Context): def _next_layer(self, context: context.Context, data_client: bytes) -> typing.Optional[layer.Layer]:
# 0. New connection if len(context.layers) == 0:
if not context.layers:
return self.make_top_layer(context) return self.make_top_layer(context)
if len(context.layers) == 1:
return layers.ServerTLSLayer(context)
top_layer = context.layers[-1]
data_client = nextlayer.data_client()
if len(data_client) < 3: if len(data_client) < 3:
return return
client_tls = is_tls_record_magic(data_client) client_tls = is_tls_record_magic(data_client)
s = lambda *layers: stack_match(context, layers) s = lambda *layers: stack_match(context, layers)
top_layer = context.layers[-1]
# 1. check for --ignore # 1. check for --ignore/--allow
if ctx.options.ignore_hosts: ignore = self.ignore_connection(context, data_client)
raise NotImplementedError() if ignore is True:
return layers.TCPLayer(context, ignore=True)
if ignore is None:
return
# 2. Always insert a TLS layer as second layer, even if there's neither client nor server # 2. Check for TLS
# tls. An addon may upgrade from http to https, in which case we need a server TLS layer. if client_tls:
if s(modes.HttpProxy) or s(modes.ReverseProxy): # client tls requires a server tls layer as parent layer
return layers.ServerTLSLayer(context) if isinstance(top_layer, layers.ServerTLSLayer):
elif len(context.layers) == 1:
raise NotImplementedError()
if s(modes.HttpProxy, layers.ServerTLSLayer) and client_tls:
# For HttpProxy, this is a "Secure Web Proxy" (https://www.chromium.org/developers/design-documents/secure-web-proxy)
return layers.ClientTLSLayer(context) return layers.ClientTLSLayer(context)
else:
if not s(modes.HttpProxy):
# A "Secure Web Proxy" (https://www.chromium.org/developers/design-documents/secure-web-proxy)
# This does not imply TLS on the server side.
pass
else:
# In all other cases, client TLS implies TLS for both ends.
context.server.tls = True
return layers.ServerTLSLayer(context)
# 3. Setup the first HTTP layer for a regular HTTP proxy or an upstream proxy. # 3. Setup the HTTP layer for a regular HTTP proxy or an upstream proxy.
if any([ if any([
s(modes.HttpProxy, layers.ServerTLSLayer), s(modes.HttpProxy, layers.ServerTLSLayer),
s(modes.HttpProxy, layers.ServerTLSLayer, layers.ClientTLSLayer), s(modes.HttpProxy, layers.ServerTLSLayer, layers.ClientTLSLayer),
]): ]):
return layers.HTTPLayer(context, HTTPMode.regular) return layers.HTTPLayer(context, HTTPMode.regular)
if ctx.options.mode.startswith("upstream:") and len(context.layers) <= 3 and isinstance(top_layer, if ctx.options.mode.startswith("upstream:") and len(context.layers) <= 3 and isinstance(top_layer,
layers.ServerTLSLayer): layers.ServerTLSLayer):
raise NotImplementedError() raise NotImplementedError()
# 4. Check for other TLS cases (e.g. after CONNECT). # 4. Check for --tcp
if client_tls: if any(
# client tls requires a server tls layer as parent layer address and re.search(rex, address, re.IGNORECASE)
if not isinstance(top_layer, layers.ServerTLSLayer): for address in (context.server.address, context.client.sni)
context.server.tls = True for rex in ctx.options.allow_hosts
return layers.ServerTLSLayer(context) ):
else:
return layers.ClientTLSLayer(context)
# 5. Check for --tcp
if self.check_tcp(context.server.address):
return layers.TCPLayer(context) return layers.TCPLayer(context)
# 6. Check for TLS ALPN (HTTP1/HTTP2) # 5. Check for raw tcp mode.
if isinstance(top_layer, layers.ServerTLSLayer): sni_indicates_non_http = (
alpn = context.client.alpn context.client.sni and context.client.sni not in HTTP_ALPNS
if alpn == b"http/1.1": )
return layers.OldHTTPLayer(context, HTTPMode.transparent) # TODO: replace this with ClientHTTP1Layer # Very simple heuristic here - the first three bytes should be
elif alpn == b"h2":
return layers.ClientHTTP2Layer(context)
# 7. Check for raw tcp mode. Very simple heuristic here - the first three bytes should be
# the HTTP verb, so A-Za-z is expected. # the HTTP verb, so A-Za-z is expected.
maybe_http = data_client[:3].isalpha() probably_no_http = (
if ctx.options.rawtcp and not maybe_http: not data_client[:3].isalpha()
)
if ctx.options.rawtcp and (sni_indicates_non_http or probably_no_http):
return layers.TCPLayer(context) return layers.TCPLayer(context)
# 8. Assume HTTP1 by default. # 6. Assume HTTP by default.
return layers.HTTPLayer(context, HTTPMode.transparent) return layers.HTTPLayer(context, HTTPMode.transparent)
def make_top_layer(self, context): def make_top_layer(self, context: context.Context) -> layer.Layer:
if ctx.options.mode == "regular": if ctx.options.mode == "regular":
return layers.modes.HttpProxy(context) return layers.modes.HttpProxy(context)
elif ctx.options.mode == "transparent": elif ctx.options.mode == "transparent":