mitmproxy/libmproxy/protocol.py

363 lines
14 KiB
Python
Raw Normal View History

2014-01-13 01:25:58 +00:00
import libmproxy.utils, libmproxy.flow
from netlib import http, http_status, tcp
import netlib.utils
2014-01-09 04:34:29 +00:00
from netlib.odict import ODictCaseless
2014-01-18 14:35:37 +00:00
import select
from proxy import ProxyError, KILL
2014-01-05 00:03:55 +00:00
2014-01-09 04:34:29 +00:00
LEGACY = True
2014-01-07 01:29:10 +00:00
2014-01-09 04:34:29 +00:00
def _handle(msg, conntype, connection_handler, *args, **kwargs):
2014-01-05 00:03:55 +00:00
handler = None
if conntype == "http":
handler = HTTPHandler(connection_handler)
else:
raise NotImplementedError
2014-01-09 04:34:29 +00:00
f = getattr(handler, "handle_" + msg)
return f(*args, **kwargs)
def handle_messages(conntype, connection_handler):
_handle("messages", conntype, connection_handler)
2014-01-07 01:29:10 +00:00
class ConnectionTypeChange(Exception):
pass
2014-01-05 00:03:55 +00:00
class ProtocolHandler(object):
def __init__(self, c):
self.c = c
2014-01-07 01:29:10 +00:00
2014-01-13 01:25:58 +00:00
"""
Minimalistic cleanroom reimplemementation of a couple of flow.* classes. Most functionality is missing,
but they demonstrate what needs to be added/changed to/within the existing classes.
"""
2014-01-18 14:35:37 +00:00
2014-01-09 04:34:29 +00:00
class Flow(object):
2014-01-18 14:35:37 +00:00
def __init__(self, conntype, client_conn, server_conn, error):
2014-01-13 01:25:58 +00:00
self.conntype = conntype
2014-01-09 04:34:29 +00:00
self.client_conn, self.server_conn = client_conn, server_conn
2014-01-13 01:25:58 +00:00
self.error = error
2014-01-09 04:34:29 +00:00
class HTTPFlow(Flow):
2014-01-18 14:35:37 +00:00
def __init__(self, client_conn, server_conn, error, request, response):
Flow.__init__(self, "http", client_conn, server_conn, error)
2014-01-09 04:34:29 +00:00
self.request, self.response = request, response
2014-01-13 01:25:58 +00:00
class HttpAuthenticationError(Exception):
def __init__(self, auth_headers=None):
self.auth_headers = auth_headers
def __str__(self):
return "HttpAuthenticationError"
class HTTPMessage(object):
def _assemble_headers(self):
headers = self.headers.copy()
libmproxy.utils.del_all(headers,
["proxy-connection",
"transfer-encoding"])
if self.content:
headers["Content-Length"] = [str(len(self.content))]
elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content
headers["Content-Length"] = ["0"]
return str(headers)
2014-01-13 01:25:58 +00:00
class HTTPResponse(HTTPMessage):
2014-01-13 01:25:58 +00:00
def __init__(self, httpversion, code, msg, headers, content, timestamp_start, timestamp_end):
self.httpversion = httpversion
2014-01-09 04:34:29 +00:00
self.code = code
self.msg = msg
self.headers = headers
self.content = content
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
assert isinstance(headers, ODictCaseless)
2014-01-13 01:25:58 +00:00
#FIXME: Compatibility Fix
2014-01-09 04:34:29 +00:00
@property
def request(self):
return False
2014-01-18 14:35:37 +00:00
def _assemble_response_line(self):
return 'HTTP/%s.%s %s %s' % (self.httpversion[0], self.httpversion[1], self.code, self.msg)
2014-01-09 04:34:29 +00:00
def _assemble(self):
2014-01-18 14:35:37 +00:00
response_line = self._assemble_response_line()
return '%s\r\n%s\r\n%s' % (response_line, self._assemble_headers(), self.content)
2014-01-09 04:34:29 +00:00
@classmethod
def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None):
"""
Parse an HTTP response from a file stream
"""
if not include_content:
raise NotImplementedError
2014-01-13 01:25:58 +00:00
httpversion, code, msg, headers, content = http.read_response(
2014-01-09 04:34:29 +00:00
rfile,
request_method,
body_size_limit)
timestamp_start = rfile.first_byte_timestamp
timestamp_end = libmproxy.utils.timestamp()
2014-01-13 01:25:58 +00:00
return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start, timestamp_end)
2014-01-09 04:34:29 +00:00
class HTTPRequest(HTTPMessage):
2014-01-13 01:25:58 +00:00
def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers, content,
2014-01-09 04:34:29 +00:00
timestamp_start, timestamp_end, form_out=None, ip=None):
self.form_in = form_in
self.method = method
self.scheme = scheme
self.host = host
self.port = port
self.path = path
2014-01-13 01:25:58 +00:00
self.httpversion = httpversion
2014-01-09 04:34:29 +00:00
self.headers = headers
self.content = content
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
self.form_out = form_out or self.form_in
self.ip = ip # resolved ip address
2014-01-09 04:34:29 +00:00
assert isinstance(headers, ODictCaseless)
2014-01-13 01:25:58 +00:00
#FIXME: Compatibility Fix
2014-01-09 04:34:29 +00:00
def is_live(self):
return True
2014-01-18 14:35:37 +00:00
def _assemble_request_line(self, form):
2014-01-09 04:34:29 +00:00
request_line = None
2014-01-18 14:35:37 +00:00
if form == "asterisk" or form == "origin":
2014-01-13 01:25:58 +00:00
request_line = '%s %s HTTP/%s.%s' % (self.method, self.path, self.httpversion[0], self.httpversion[1])
2014-01-18 14:35:37 +00:00
elif form == "authority":
2014-01-09 16:56:42 +00:00
request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port,
2014-01-13 01:25:58 +00:00
self.httpversion[0], self.httpversion[1])
2014-01-18 14:35:37 +00:00
elif form == "absolute":
request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \
(self.method, self.scheme, self.host, self.port, self.path,
2014-01-13 01:25:58 +00:00
self.httpversion[0], self.httpversion[1])
2014-01-09 04:34:29 +00:00
else:
raise http.HttpError(400, "Invalid request form")
2014-01-18 14:35:37 +00:00
return request_line
2014-01-18 14:35:37 +00:00
def _assemble(self):
request_line = self._assemble_request_line(self.form_out)
return '%s\r\n%s\r\n%s' % (request_line, self._assemble_headers(), self.content)
2014-01-09 04:34:29 +00:00
@classmethod
def from_stream(cls, rfile, include_content=True, body_size_limit=None):
"""
Parse an HTTP request from a file stream
"""
2014-01-13 01:25:58 +00:00
httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \
2014-01-09 04:34:29 +00:00
= None, None, None, None, None, None, None, None, None, None
request_line = HTTPHandler.get_line(rfile)
timestamp_start = rfile.first_byte_timestamp
2014-01-09 04:34:29 +00:00
request_line_parts = http.parse_init(request_line)
if not request_line_parts:
2014-01-18 14:35:37 +00:00
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
2014-01-13 01:25:58 +00:00
method, path, httpversion = request_line_parts
2014-01-09 04:34:29 +00:00
if path == '*':
form_in = "asterisk"
elif path.startswith("/"):
form_in = "origin"
if not netlib.utils.isascii(path):
2014-01-18 14:35:37 +00:00
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
2014-01-09 04:34:29 +00:00
elif method.upper() == 'CONNECT':
form_in = "authority"
r = http.parse_init_connect(request_line)
if not r:
2014-01-18 14:35:37 +00:00
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
2014-01-09 04:34:29 +00:00
host, port, _ = r
else:
form_in = "absolute"
r = http.parse_init_proxy(request_line)
if not r:
2014-01-18 14:35:37 +00:00
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
2014-01-09 04:34:29 +00:00
_, scheme, host, port, path, _ = r
headers = http.read_headers(rfile)
if headers is None:
2014-01-13 01:25:58 +00:00
raise http.HttpError(400, "Invalid headers")
2014-01-09 04:34:29 +00:00
if include_content:
content = http.read_http_body(rfile, headers, body_size_limit, True)
timestamp_end = libmproxy.utils.timestamp()
2014-01-09 04:34:29 +00:00
2014-01-13 01:25:58 +00:00
return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content,
2014-01-09 04:34:29 +00:00
timestamp_start, timestamp_end)
2014-01-05 00:03:55 +00:00
class HTTPHandler(ProtocolHandler):
def handle_messages(self):
2014-01-13 01:25:58 +00:00
while self.handle_flow():
2014-01-05 00:03:55 +00:00
pass
self.c.close = True
def get_response_from_server(self, request):
request_raw = request._assemble()
for i in range(2):
try:
self.c.server_conn.wfile.write(request_raw)
self.c.server_conn.wfile.flush()
return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method,
body_size_limit=self.c.config.body_size_limit)
except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v:
self.c.log("error in server communication: %s" % str(v))
if i < 1:
# In any case, we try to reconnect at least once.
# This is necessary because it might be possible that we already initiated an upstream connection
# after clientconnect that has already been expired, e.g consider the following event log:
# > clientconnect (transparent mode destination known)
# > serverconnect
# > read n% of large request
# > server detects timeout, disconnects
# > read (100-n)% of large request
# > send large request upstream
self.c.server_reconnect()
2014-01-18 14:35:37 +00:00
else:
raise v
2014-01-13 01:25:58 +00:00
def handle_flow(self):
2014-01-18 14:35:37 +00:00
flow = HTTPFlow(self.c.client_conn, self.c.server_conn, None, None, None)
2014-01-09 04:34:29 +00:00
try:
2014-01-13 01:25:58 +00:00
flow.request = HTTPRequest.from_stream(self.c.client_conn.rfile,
body_size_limit=self.c.config.body_size_limit)
2014-01-18 14:35:37 +00:00
self.c.log("request", [flow.request._assemble_request_line(flow.request.form_in)])
2014-01-13 01:25:58 +00:00
self.process_request(flow.request)
2014-01-05 00:03:55 +00:00
2014-01-13 01:25:58 +00:00
request_reply = self.c.channel.ask("request" if LEGACY else "httprequest",
flow.request if LEGACY else flow)
2014-01-09 04:34:29 +00:00
if request_reply is None or request_reply == KILL:
return False
2014-01-09 04:34:29 +00:00
if isinstance(request_reply, HTTPResponse):
flow.response = request_reply
else:
flow.response = self.get_response_from_server(flow.request)
2014-01-18 14:35:37 +00:00
self.c.log("response", [flow.response._assemble_response_line()])
response_reply = self.c.channel.ask("response" if LEGACY else "httpresponse",
flow.response if LEGACY else flow)
2014-01-09 04:34:29 +00:00
if response_reply is None or response_reply == KILL:
return False
raw = flow.response._assemble()
self.c.client_conn.wfile.write(raw)
self.c.client_conn.wfile.flush()
flow.timestamp_end = libmproxy.utils.timestamp()
2014-01-05 00:03:55 +00:00
2014-01-13 01:25:58 +00:00
if (http.connection_close(flow.request.httpversion, flow.request.headers) or
http.connection_close(flow.response.httpversion, flow.response.headers)):
2014-01-09 04:34:29 +00:00
return False
if flow.request.form_in == "authority":
self.ssl_upgrade()
2014-01-13 01:25:58 +00:00
return True
except HttpAuthenticationError, e:
self.process_error(flow, code=407, message="Proxy Authentication Required", headers=e.auth_headers)
2014-01-18 14:35:37 +00:00
except (http.HttpError, ProxyError), e:
2014-01-13 01:25:58 +00:00
self.process_error(flow, code=e.code, message=e.msg)
except tcp.NetLibError, e:
2014-01-18 14:35:37 +00:00
self.process_error(flow, message=e.message or e.__class__)
2014-01-13 01:25:58 +00:00
return False
def process_error(self, flow, code=None, message=None, headers=None):
try:
err = ("%s: %s" % (code, message)) if code else message
flow.error = libmproxy.flow.Error(False, err)
self.c.log("error: %s" % err)
self.c.channel.ask("error" if LEGACY else "httperror",
flow.error if LEGACY else flow)
if code:
self.send_error(code, message, headers)
except:
pass
def send_error(self, code, message, headers):
response = http_status.RESPONSES.get(code, "Unknown")
html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
(code, response, message)
self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
2014-01-18 14:35:37 +00:00
self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
2014-01-13 01:25:58 +00:00
self.c.client_conn.wfile.write("Content-type: text/html\r\n")
2014-01-18 14:35:37 +00:00
self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
2014-01-13 01:25:58 +00:00
if headers:
for key, value in headers.items():
2014-01-18 14:35:37 +00:00
self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value))
2014-01-13 01:25:58 +00:00
self.c.client_conn.wfile.write("Connection: close\r\n")
self.c.client_conn.wfile.write("\r\n")
self.c.client_conn.wfile.write(html_content)
self.c.client_conn.wfile.flush()
2014-01-09 04:34:29 +00:00
def ssl_upgrade(self):
self.c.mode = "transparent"
self.c.determine_conntype()
self.c.establish_ssl(server=True, client=True)
raise ConnectionTypeChange
2014-01-13 01:25:58 +00:00
def process_request(self, request):
2014-01-07 01:29:10 +00:00
if self.c.mode == "regular":
2014-01-09 04:34:29 +00:00
self.authenticate(request)
2014-01-09 16:56:42 +00:00
if request.form_in == "authority" and self.c.client_conn.ssl_established:
2014-01-13 01:25:58 +00:00
raise http.HttpError(502, "Must not CONNECT on already encrypted connection")
2014-01-09 16:56:42 +00:00
# If we have a CONNECT request, we might need to intercept
if request.form_in == "authority":
directly_addressed_at_mitmproxy = (self.c.mode == "regular") and not self.c.config.forward_proxy
if directly_addressed_at_mitmproxy:
self.c.establish_server_connection(request.host, request.port)
self.c.client_conn.wfile.write(
'HTTP/1.1 200 Connection established\r\n' +
2014-01-18 14:35:37 +00:00
('Proxy-agent: %s\r\n' % self.c.server_version) +
2014-01-09 16:56:42 +00:00
'\r\n'
)
self.c.client_conn.wfile.flush()
2014-01-13 01:25:58 +00:00
self.ssl_upgrade() # raises ConnectionTypeChange exception
2014-01-09 16:56:42 +00:00
if self.c.mode == "regular":
if request.form_in == "authority":
pass
2014-01-09 04:34:29 +00:00
elif request.form_in == "absolute":
2014-01-07 01:29:10 +00:00
if not self.c.config.forward_proxy:
2014-01-18 14:35:37 +00:00
request.form_out = "origin"
if ((not self.c.server_conn) or
(self.c.server_conn.address != (request.host, request.port))):
self.c.establish_server_connection(request.host, request.port)
2014-01-09 04:34:29 +00:00
else:
2014-01-13 01:25:58 +00:00
raise http.HttpError(400, "Invalid Request")
2014-01-09 04:34:29 +00:00
def authenticate(self, request):
if self.c.config.authenticator:
if self.c.config.authenticator.authenticate(request.headers):
self.c.config.authenticator.clean(request.headers)
else:
2014-01-13 01:25:58 +00:00
raise HttpAuthenticationError(self.c.config.authenticator.auth_challenge_headers())
2014-01-09 04:34:29 +00:00
return request.headers
@staticmethod
def get_line(fp):
2014-01-05 00:03:55 +00:00
"""
Get a line, possibly preceded by a blank.
"""
line = fp.readline()
if line == "\r\n" or line == "\n": # Possible leftover from previous message
2014-01-05 00:03:55 +00:00
line = fp.readline()
2014-01-09 04:34:29 +00:00
if line == "":
raise tcp.NetLibDisconnect
return line