merge flow classes. current status: basic mitmdump working

This commit is contained in:
Maximilian Hils 2014-01-30 05:00:13 +01:00
parent 607f777811
commit 40bf42f14a
6 changed files with 1117 additions and 485 deletions

View File

@ -189,7 +189,7 @@ def format_flow(f, focus, extended=False, hostheader=False, padding=2):
contentdesc = "[no content]"
delta = f.response.timestamp_end - f.response.timestamp_start
size = len(f.response.content) + f.response.get_header_size()
size = f.response.size()
rate = utils.pretty_size(size / ( delta if delta > 0 else 1 ) )
d.update(dict(

View File

@ -143,459 +143,68 @@ class SetHeaders:
f.request.headers.add(header, value)
class decoded(object):
"""
A context manager that decodes a request, response or error, and then
re-encodes it with the same encoding after execution of the block.
Example:
with decoded(request):
request.content = request.content.replace("foo", "bar")
"""
def __init__(self, o):
self.o = o
ce = o.headers.get_first("content-encoding")
if ce in encoding.ENCODINGS:
self.ce = ce
else:
self.ce = None
def __enter__(self):
if self.ce:
self.o.decode()
def __exit__(self, type, value, tb):
if self.ce:
self.o.encode(self.ce)
class StateObject:
def _get_state(self):
raise NotImplementedError
def _load_state(self, state):
raise NotImplementedError
@classmethod
def _from_state(cls, state):
raise NotImplementedError
def __eq__(self, other):
try:
return self._get_state() == other._get_state()
except AttributeError:
except AttributeError: # we may compare with something that's not a StateObject
return False
class HTTPMsg(StateObject):
def get_decoded_content(self):
"""
Returns the decoded content based on the current Content-Encoding header.
Doesn't change the message iteself or its headers.
"""
ce = self.headers.get_first("content-encoding")
if not self.content or ce not in encoding.ENCODINGS:
return self.content
return encoding.decode(ce, self.content)
def decode(self):
"""
Decodes content based on the current Content-Encoding header, then
removes the header. If there is no Content-Encoding header, no
action is taken.
Returns True if decoding succeeded, False otherwise.
"""
ce = self.headers.get_first("content-encoding")
if not self.content or ce not in encoding.ENCODINGS:
return False
data = encoding.decode(
ce,
self.content
)
if data is None:
return False
self.content = data
del self.headers["content-encoding"]
return True
def encode(self, e):
"""
Encodes content with the encoding e, where e is "gzip", "deflate"
or "identity".
"""
# FIXME: Error if there's an existing encoding header?
self.content = encoding.encode(e, self.content)
self.headers["content-encoding"] = [e]
def size(self, **kwargs):
"""
Size in bytes of a fully rendered message, including headers and
HTTP lead-in.
"""
hl = len(self._assemble_head(**kwargs))
if self.content:
return hl + len(self.content)
else:
return hl
def get_content_type(self):
return self.headers.get_first("content-type")
def get_transmitted_size(self):
# FIXME: this is inprecise in case chunking is used
# (we should count the chunking headers)
if not self.content:
return 0
return len(self.content)
class Request(HTTPMsg):
class SimpleStateObject(StateObject):
"""
An HTTP request.
Exposes the following attributes:
client_conn: ClientConnect object, or None if this is a replay.
headers: ODictCaseless object
content: Content of the request, None, or CONTENT_MISSING if there
is content associated, but not present. CONTENT_MISSING evaluates
to False to make checking for the presence of content natural.
scheme: URL scheme (http/https)
host: Host portion of the URL
port: Destination port
path: Path portion of the URL
timestamp_start: Seconds since the epoch signifying request transmission started
method: HTTP method
timestamp_end: Seconds since the epoch signifying request transmission ended
tcp_setup_timestamp: Seconds since the epoch signifying remote TCP connection setup completion time
(or None, if request didn't results TCP setup)
ssl_setup_timestamp: Seconds since the epoch signifying remote SSL encryption setup completion time
(or None, if request didn't results SSL setup)
A StateObject with opionated conventions that tries to keep everything DRY.y
"""
def __init__(
self, client_conn, httpversion, host, port,
scheme, method, path, headers, content, timestamp_start=None,
timestamp_end=None, tcp_setup_timestamp=None,
ssl_setup_timestamp=None, ip=None):
assert isinstance(headers, ODictCaseless)
self.client_conn = client_conn
self.httpversion = httpversion
self.host, self.port, self.scheme = host, port, scheme
self.method, self.path, self.headers, self.content = method, path, headers, content
self.timestamp_start = timestamp_start or utils.timestamp()
self.timestamp_end = max(timestamp_end or utils.timestamp(), timestamp_start)
self.close = False
self.tcp_setup_timestamp = tcp_setup_timestamp
self.ssl_setup_timestamp = ssl_setup_timestamp
self.ip = ip
# Have this request's cookies been modified by sticky cookies or auth?
self.stickycookie = False
self.stickyauth = False
# Live attributes - not serialized
self.wfile, self.rfile = None, None
def set_live(self, rfile, wfile):
self.wfile, self.rfile = wfile, rfile
def is_live(self):
return bool(self.wfile)
def anticache(self):
"""
Modifies this request to remove headers that might produce a cached
response. That is, we remove ETags and If-Modified-Since headers.
"""
delheaders = [
"if-modified-since",
"if-none-match",
]
for i in delheaders:
del self.headers[i]
def anticomp(self):
"""
Modifies this request to remove headers that will compress the
resource's data.
"""
self.headers["accept-encoding"] = ["identity"]
def constrain_encoding(self):
"""
Limits the permissible Accept-Encoding values, based on what we can
decode appropriately.
"""
if self.headers["accept-encoding"]:
self.headers["accept-encoding"] = [', '.join(
e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
)]
def _set_replay(self):
self.client_conn = None
def is_replay(self):
"""
Is this request a replay?
"""
if self.client_conn:
return False
else:
return True
def _load_state(self, state):
if state["client_conn"]:
if self.client_conn:
self.client_conn._load_state(state["client_conn"])
else:
self.client_conn = ClientConnect._from_state(state["client_conn"])
else:
self.client_conn = None
self.host = state["host"]
self.port = state["port"]
self.scheme = state["scheme"]
self.method = state["method"]
self.path = state["path"]
self.headers = ODictCaseless._from_state(state["headers"])
self.content = state["content"]
self.timestamp_start = state["timestamp_start"]
self.timestamp_end = state["timestamp_end"]
self.tcp_setup_timestamp = state["tcp_setup_timestamp"]
self.ssl_setup_timestamp = state["ssl_setup_timestamp"]
self.ip = state["ip"]
_stateobject_attributes = None
"""
A dict where the keys represent the attributes to be serialized.
The values represent the attribute class or type.
If the attribute is a class, this class must be a subclass of StateObject.
"""
def _get_state(self):
return dict(
client_conn = self.client_conn._get_state() if self.client_conn else None,
httpversion = self.httpversion,
host = self.host,
port = self.port,
scheme = self.scheme,
method = self.method,
path = self.path,
headers = self.headers._get_state(),
content = self.content,
timestamp_start = self.timestamp_start,
timestamp_end = self.timestamp_end,
tcp_setup_timestamp = self.tcp_setup_timestamp,
ssl_setup_timestamp = self.ssl_setup_timestamp,
ip = self.ip
)
return {attr: (getattr(self, attr)._get_state()
if (type(cls) == 'classobj')
else getattr(self, attr))
for attr, cls in self._stateobject_attributes.iteritems()}
def _load_state(self, state):
for attr, cls in self._stateobject_attributes.iteritems():
self._load_state_attr(attr, cls, state)
def _load_state_attr(self, attribute, cls, state):
if state[attribute] is not None:
if type(cls) == 'classobj':
assert issubclass(cls, StateObject)
curr = getattr(self, attribute)
if curr:
curr._load_state(state[attribute])
else:
setattr(self, attribute, cls._from_state(state[attribute]))
else:
setattr(self, attribute, cls(state[attribute]))
else:
setattr(self, attribute, None)
@classmethod
def _from_state(klass, state):
return klass(
ClientConnect._from_state(state["client_conn"]),
tuple(state["httpversion"]),
str(state["host"]),
state["port"],
str(state["scheme"]),
str(state["method"]),
str(state["path"]),
ODictCaseless._from_state(state["headers"]),
state["content"],
state["timestamp_start"],
state["timestamp_end"],
state["tcp_setup_timestamp"],
state["ssl_setup_timestamp"],
state["ip"]
)
def __hash__(self):
return id(self)
def copy(self):
c = copy.copy(self)
c.headers = self.headers.copy()
return c
def get_form_urlencoded(self):
"""
Retrieves the URL-encoded form data, returning an ODict object.
Returns an empty ODict if there is no data or the content-type
indicates non-form data.
"""
if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
return ODict(utils.urldecode(self.content))
return ODict([])
def set_form_urlencoded(self, odict):
"""
Sets the body to the URL-encoded form data, and adds the
appropriate content-type header. Note that this will destory the
existing body if there is one.
"""
# FIXME: If there's an existing content-type header indicating a
# url-encoded form, leave it alone.
self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
self.content = utils.urlencode(odict.lst)
def get_path_components(self):
"""
Returns the path components of the URL as a list of strings.
Components are unquoted.
"""
_, _, path, _, _, _ = urlparse.urlparse(self.get_url())
return [urllib.unquote(i) for i in path.split("/") if i]
def set_path_components(self, lst):
"""
Takes a list of strings, and sets the path component of the URL.
Components are quoted.
"""
lst = [urllib.quote(i, safe="") for i in lst]
path = "/" + "/".join(lst)
scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
def get_query(self):
"""
Gets the request query string. Returns an ODict object.
"""
_, _, _, _, query, _ = urlparse.urlparse(self.get_url())
if query:
return ODict(utils.urldecode(query))
return ODict([])
def set_query(self, odict):
"""
Takes an ODict object, and sets the request query string.
"""
scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
query = utils.urlencode(odict.lst)
self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
def get_url(self, hostheader=False):
"""
Returns a URL string, constructed from the Request's URL compnents.
If hostheader is True, we use the value specified in the request
Host header to construct the URL.
"""
if hostheader:
host = self.headers.get_first("host") or self.host
else:
host = self.host
host = host.encode("idna")
return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii')
def set_url(self, url):
"""
Parses a URL specification, and updates the Request's information
accordingly.
Returns False if the URL was invalid, True if the request succeeded.
"""
parts = http.parse_url(url)
if not parts:
return False
self.scheme, self.host, self.port, self.path = parts
return True
def get_cookies(self):
cookie_headers = self.headers.get("cookie")
if not cookie_headers:
return None
cookies = []
for header in cookie_headers:
pairs = [pair.partition("=") for pair in header.split(';')]
cookies.extend((pair[0],(pair[2],{})) for pair in pairs)
return dict(cookies)
def get_header_size(self):
FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
assembled_header = FMT % (
self.method,
self.path,
self.httpversion[0],
self.httpversion[1],
str(self.headers)
)
return len(assembled_header)
def _assemble_head(self, proxy=False):
FMT = '%s %s HTTP/%s.%s\r\n%s\r\n'
FMT_PROXY = '%s %s://%s:%s%s HTTP/%s.%s\r\n%s\r\n'
headers = self.headers.copy()
utils.del_all(
headers,
[
'proxy-connection',
'keep-alive',
'connection',
'transfer-encoding'
]
)
if not 'host' in headers:
headers["host"] = [utils.hostport(self.scheme, self.host, self.port)]
content = self.content
if content:
headers["Content-Length"] = [str(len(content))]
else:
content = ""
if self.close:
headers["connection"] = ["close"]
if not proxy:
return FMT % (
self.method,
self.path,
self.httpversion[0],
self.httpversion[1],
str(headers)
)
else:
return FMT_PROXY % (
self.method,
self.scheme,
self.host,
self.port,
self.path,
self.httpversion[0],
self.httpversion[1],
str(headers)
)
def _assemble(self, _proxy = False):
"""
Assembles the request for transmission to the server. We make some
modifications to make sure interception works properly.
Returns None if the request cannot be assembled.
"""
if self.content == CONTENT_MISSING:
return None
head = self._assemble_head(_proxy)
if self.content:
return head + self.content
else:
return head
def replace(self, pattern, repl, *args, **kwargs):
"""
Replaces a regular expression pattern with repl in both the headers
and the body of the request. Encoded content will be decoded before
replacement, and re-encoded afterwards.
Returns the number of replacements made.
"""
with decoded(self):
self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
c += pc
c += self.headers.replace(pattern, repl, *args, **kwargs)
return c
def _from_state(cls, state):
f = cls()
f._load_state(state)
return f
class Response(HTTPMsg):
class Response(object):
"""
An HTTP response.
@ -1269,7 +878,7 @@ class State(object):
"""
Add a response to the state. Returns the matching flow.
"""
f = self._flow_map.get(resp.request)
f = self._flow_map.get(resp.flow)
if not f:
return False
f.response = resp
@ -1596,7 +1205,7 @@ class FlowMaster(controller.Master):
return f
def handle_request(self, r):
if r.is_live():
if False and r.is_live(): # FIXME
app = self.apps.get(r)
if app:
# FIXME: for the tcp proxy, use flow.client_conn.wfile

View File

@ -0,0 +1,46 @@
KILL = 0 # const for killed requests
class ConnectionTypeChange(Exception):
"""
Gets raised if the connetion type has been changed (e.g. after HTTP/1.1 101 Switching Protocols).
It's up to the raising ProtocolHandler to specify the new conntype before raising the exception.
"""
pass
class ProtocolHandler(object):
def __init__(self, c):
self.c = c
def handle_messages(self):
"""
This method gets called if a client connection has been made. Depending on the proxy settings,
a server connection might already exist as well.
"""
raise NotImplementedError
def handle_error(self, error):
"""
This method gets called should there be an uncaught exception during the connection.
This might happen outside of handle_messages, e.g. if the initial SSL handshake fails in transparent mode.
"""
raise NotImplementedError
from .http import HTTPHandler
def _handler(conntype, connection_handler):
if conntype == "http":
return HTTPHandler(connection_handler)
raise NotImplementedError
def handle_messages(conntype, connection_handler):
return _handler(conntype, connection_handler).handle_messages()
def handle_error(conntype, connection_handler, error):
return _handler(conntype, connection_handler).handle_error(error)

999
libmproxy/protocol/http.py Normal file
View File

@ -0,0 +1,999 @@
import Cookie
from email.utils import parsedate_tz, formatdate, mktime_tz
import urllib
import urlparse
import time
import copy
from ..flow import SimpleStateObject
from netlib import http, tcp, http_status
from netlib.odict import ODict, ODictCaseless
import netlib.utils
from .. import encoding, utils, version
from ..proxy import ProxyError, ServerConnection, ClientConnection
from . import ProtocolHandler, ConnectionTypeChange, KILL
import libmproxy.flow
HDR_FORM_URLENCODED = "application/x-www-form-urlencoded"
CONTENT_MISSING = 0
LEGACY = True
def get_line(fp):
"""
Get a line, possibly preceded by a blank.
"""
line = fp.readline()
if line == "\r\n" or line == "\n": # Possible leftover from previous message
line = fp.readline()
if line == "":
raise tcp.NetLibDisconnect
return line
class decoded(object):
"""
A context manager that decodes a request or response, and then
re-encodes it with the same encoding after execution of the block.
Example:
with decoded(request):
request.content = request.content.replace("foo", "bar")
"""
def __init__(self, o):
self.o = o
ce = o.headers.get_first("content-encoding")
if ce in encoding.ENCODINGS:
self.ce = ce
else:
self.ce = None
def __enter__(self):
if self.ce:
self.o.decode()
def __exit__(self, type, value, tb):
if self.ce:
self.o.encode(self.ce)
class BackreferenceMixin(object):
"""
If an attribute from the _backrefattr tuple is set,
this mixin sets a reference back on the attribute object.
Example:
e = Error()
f = Flow()
f.error = e
assert f is e.flow
"""
_backrefattr = tuple()
def __setattr__(self, key, value):
super(BackreferenceMixin, self).__setattr__(key, value)
if key in self._backrefattr and value is not None:
# check if there is already a different object set as backref
assert (getattr(value, self._backrefname, self) or self) is self
setattr(value, self._backrefname, self)
class Error(SimpleStateObject):
"""
An Error.
This is distinct from an HTTP error response (say, a code 500), which
is represented by a normal Response object. This class is responsible
for indicating errors that fall outside of normal HTTP communications,
like interrupted connections, timeouts, protocol errors.
Exposes the following attributes:
flow: Flow object
msg: Message describing the error
timestamp: Seconds since the epoch
"""
def __init__(self, msg, timestamp=None):
self.msg = msg
self.timestamp = timestamp or utils.timestamp()
_stateobject_attributes = dict(
msg=str,
timestamp=float
)
def copy(self):
c = copy.copy(self)
return c
class Flow(SimpleStateObject, BackreferenceMixin):
_backrefattr = ("error",)
_backrefname = "flow"
_stateobject_attributes = dict(
error=Error,
client_conn=ClientConnection,
server_conn=ServerConnection,
conntype=str
)
def __init__(self, conntype, client_conn, server_conn, error):
self.conntype = conntype
self.client_conn = client_conn
self.server_conn = server_conn
self.error = error
def _get_state(self):
d = super(Flow, self)._get_state()
d.update(version=version.IVERSION)
return d
@classmethod
def _from_state(cls, state):
f = cls(None, None, None, None)
f._load_state(state)
return f
def copy(self):
f = copy.copy(self)
if self.error:
f.error = self.error.copy()
return f
class HTTPMessage(SimpleStateObject):
def __init__(self):
self.flow = None # Will usually set by backref mixin
def get_decoded_content(self):
"""
Returns the decoded content based on the current Content-Encoding header.
Doesn't change the message iteself or its headers.
"""
ce = self.headers.get_first("content-encoding")
if not self.content or ce not in encoding.ENCODINGS:
return self.content
return encoding.decode(ce, self.content)
def decode(self):
"""
Decodes content based on the current Content-Encoding header, then
removes the header. If there is no Content-Encoding header, no
action is taken.
Returns True if decoding succeeded, False otherwise.
"""
ce = self.headers.get_first("content-encoding")
if not self.content or ce not in encoding.ENCODINGS:
return False
data = encoding.decode(ce, self.content)
if data is None:
return False
self.content = data
del self.headers["content-encoding"]
return True
def encode(self, e):
"""
Encodes content with the encoding e, where e is "gzip", "deflate"
or "identity".
"""
# FIXME: Error if there's an existing encoding header?
self.content = encoding.encode(e, self.content)
self.headers["content-encoding"] = [e]
def size(self, **kwargs):
"""
Size in bytes of a fully rendered message, including headers and
HTTP lead-in.
"""
hl = len(self._assemble_head(**kwargs))
if self.content:
return hl + len(self.content)
else:
return hl
def copy(self):
c = copy.copy(self)
c.headers = self.headers.copy()
return c
def replace(self, pattern, repl, *args, **kwargs):
"""
Replaces a regular expression pattern with repl in both the headers
and the body of the message. Encoded content will be decoded
before replacement, and re-encoded afterwards.
Returns the number of replacements made.
"""
with decoded(self):
self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs)
c += self.headers.replace(pattern, repl, *args, **kwargs)
return c
@classmethod
def from_stream(cls, rfile, include_content=True, body_size_limit=None):
"""
Parse an HTTP message from a file stream
"""
raise NotImplementedError
def _assemble_first_line(self):
"""
Returns the assembled request/response line
"""
raise NotImplementedError
def _assemble_headers(self):
"""
Returns the assembled headers
"""
raise NotImplementedError
def _assemble_head(self):
"""
Returns the assembled request/response line plus headers
"""
raise NotImplementedError
def _assemble(self):
"""
Returns the assembled request/response
"""
raise NotImplementedError
class HTTPRequest(HTTPMessage):
"""
An HTTP request.
Exposes the following attributes:
flow: Flow object the request belongs to
headers: ODictCaseless object
content: Content of the request, None, or CONTENT_MISSING if there
is content associated, but not present. CONTENT_MISSING evaluates
to False to make checking for the presence of content natural.
form_in: The request form which mitmproxy has received. The following values are possible:
- origin (GET /index.html)
- absolute (GET http://example.com:80/index.html)
- authority-form (CONNECT example.com:443)
- asterisk-form (OPTIONS *)
Details: http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-25#section-5.3
form_out: The request form which mitmproxy has send out to the destination
method: HTTP method
scheme: URL scheme (http/https) (absolute-form only)
host: Host portion of the URL (absolute-form and authority-form only)
port: Destination port (absolute-form and authority-form only)
path: Path portion of the URL (not present in authority-form)
httpversion: HTTP version tuple
timestamp_start: Timestamp indicating when request transmission started
timestamp_end: Timestamp indicating when request transmission ended
"""
def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers, content,
timestamp_start, timestamp_end, form_out=None):
assert isinstance(headers, ODictCaseless) or not headers
HTTPMessage.__init__(self)
self.form_in = form_in
self.method = method
self.scheme = scheme
self.host = host
self.port = port
self.path = path
self.httpversion = httpversion
self.headers = headers
self.content = content
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
self.form_out = form_out or form_in
## (Attributes below don't get serialized)
# Have this request's cookies been modified by sticky cookies or auth?
self.stickycookie = False
self.stickyauth = False
# Is this request replayed?
self.is_replay = False
_stateobject_attributes = dict(
form_in=str,
method=str,
scheme=str,
host=str,
port=int,
path=str,
httpversion=tuple,
headers=ODictCaseless,
content=str,
timestamp_start=float,
timestamp_end=float,
form_out=str
)
@classmethod
def _from_state(cls, state):
f = cls(None, None, None, None, None, None, None, None, None, None, None)
f._load_state(state)
return f
@classmethod
def from_stream(cls, rfile, include_content=True, body_size_limit=None):
"""
Parse an HTTP request from a file stream
"""
httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \
= None, None, None, None, None, None, None, None, None, None
rfile.reset_timestamps()
request_line = get_line(rfile)
timestamp_start = rfile.first_byte_timestamp
request_line_parts = http.parse_init(request_line)
if not request_line_parts:
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
method, path, httpversion = request_line_parts
if path == '*':
form_in = "asterisk"
elif path.startswith("/"):
form_in = "origin"
if not netlib.utils.isascii(path):
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
elif method.upper() == 'CONNECT':
form_in = "authority"
r = http.parse_init_connect(request_line)
if not r:
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
host, port, _ = r
path = None
else:
form_in = "absolute"
r = http.parse_init_proxy(request_line)
if not r:
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
_, scheme, host, port, path, _ = r
headers = http.read_headers(rfile)
if headers is None:
raise http.HttpError(400, "Invalid headers")
if include_content:
content = http.read_http_body(rfile, headers, body_size_limit, True)
timestamp_end = utils.timestamp()
return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content,
timestamp_start, timestamp_end)
def _assemble_first_line(self, form=None):
form = form or self.form_out
if form == "asterisk" or \
form == "origin":
request_line = '%s %s HTTP/%s.%s' % (self.method, self.path, self.httpversion[0], self.httpversion[1])
elif form == "authority":
request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port,
self.httpversion[0], self.httpversion[1])
elif form == "absolute":
request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \
(self.method, self.scheme, self.host, self.port, self.path,
self.httpversion[0], self.httpversion[1])
else:
raise http.HttpError(400, "Invalid request form")
return request_line
def _assemble_headers(self):
headers = self.headers.copy()
utils.del_all(
headers,
[
'Proxy-Connection',
'Keep-Alive',
'Connection',
'Transfer-Encoding'
]
)
if not 'host' in headers:
headers["Host"] = [utils.hostport(self.scheme, self.host, self.port)]
if self.content:
headers["Content-Length"] = [str(len(self.content))]
elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content
headers["Content-Length"] = ["0"]
return str(headers)
def _assemble_head(self, form=None):
return "%s\r\n%s\r\n" % (self._assemble_first_line(form), self._assemble_headers())
def _assemble(self, form=None):
"""
Assembles the request for transmission to the server. We make some
modifications to make sure interception works properly.
Raises an Exception if the request cannot be assembled.
"""
if self.content == CONTENT_MISSING:
raise Exception("CONTENT_MISSING") # FIXME correct exception class
head = self._assemble_head(form)
if self.content:
return head + self.content
else:
return head
def __hash__(self):
return id(self)
def anticache(self):
"""
Modifies this request to remove headers that might produce a cached
response. That is, we remove ETags and If-Modified-Since headers.
"""
delheaders = [
"if-modified-since",
"if-none-match",
]
for i in delheaders:
del self.headers[i]
def anticomp(self):
"""
Modifies this request to remove headers that will compress the
resource's data.
"""
self.headers["accept-encoding"] = ["identity"]
def constrain_encoding(self):
"""
Limits the permissible Accept-Encoding values, based on what we can
decode appropriately.
"""
if self.headers["accept-encoding"]:
self.headers["accept-encoding"] = [', '.join(
e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0]
)]
def get_form_urlencoded(self):
"""
Retrieves the URL-encoded form data, returning an ODict object.
Returns an empty ODict if there is no data or the content-type
indicates non-form data.
"""
if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True):
return ODict(utils.urldecode(self.content))
return ODict([])
def set_form_urlencoded(self, odict):
"""
Sets the body to the URL-encoded form data, and adds the
appropriate content-type header. Note that this will destory the
existing body if there is one.
"""
# FIXME: If there's an existing content-type header indicating a
# url-encoded form, leave it alone.
self.headers["Content-Type"] = [HDR_FORM_URLENCODED]
self.content = utils.urlencode(odict.lst)
def get_path_components(self):
"""
Returns the path components of the URL as a list of strings.
Components are unquoted.
"""
_, _, path, _, _, _ = urlparse.urlparse(self.get_url())
return [urllib.unquote(i) for i in path.split("/") if i]
def set_path_components(self, lst):
"""
Takes a list of strings, and sets the path component of the URL.
Components are quoted.
"""
lst = [urllib.quote(i, safe="") for i in lst]
path = "/" + "/".join(lst)
scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url())
self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
def get_query(self):
"""
Gets the request query string. Returns an ODict object.
"""
_, _, _, _, query, _ = urlparse.urlparse(self.get_url())
if query:
return ODict(utils.urldecode(query))
return ODict([])
def set_query(self, odict):
"""
Takes an ODict object, and sets the request query string.
"""
scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url())
query = utils.urlencode(odict.lst)
self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment]))
def get_url(self, hostheader=False):
"""
Returns a URL string, constructed from the Request's URL compnents.
If hostheader is True, we use the value specified in the request
Host header to construct the URL.
"""
if hostheader:
host = self.headers.get_first("host") or self.host
else:
host = self.host
host = host.encode("idna")
return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii')
def set_url(self, url):
"""
Parses a URL specification, and updates the Request's information
accordingly.
Returns False if the URL was invalid, True if the request succeeded.
"""
parts = http.parse_url(url)
if not parts:
return False
self.scheme, self.host, self.port, self.path = parts
return True
def get_cookies(self):
cookie_headers = self.headers.get("cookie")
if not cookie_headers:
return None
cookies = []
for header in cookie_headers:
pairs = [pair.partition("=") for pair in header.split(';')]
cookies.extend((pair[0], (pair[2], {})) for pair in pairs)
return dict(cookies)
def replace(self, pattern, repl, *args, **kwargs):
"""
Replaces a regular expression pattern with repl in the headers, the request path
and the body of the request. Encoded content will be decoded before
replacement, and re-encoded afterwards.
Returns the number of replacements made.
"""
c = HTTPMessage.replace(self, pattern, repl, *args, **kwargs)
self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs)
c += pc
return c
class HTTPResponse(HTTPMessage):
"""
An HTTP response.
Exposes the following attributes:
flow: Flow object the request belongs to
code: HTTP response code
msg: HTTP response message
headers: ODict object
content: Content of the request, None, or CONTENT_MISSING if there
is content associated, but not present. CONTENT_MISSING evaluates
to False to make checking for the presence of content natural.
httpversion: HTTP version tuple
timestamp_start: Timestamp indicating when request transmission started
timestamp_end: Timestamp indicating when request transmission ended
"""
def __init__(self, httpversion, code, msg, headers, content, timestamp_start, timestamp_end):
assert isinstance(headers, ODictCaseless)
HTTPMessage.__init__(self)
self.httpversion = httpversion
self.code = code
self.msg = msg
self.headers = headers
self.content = content
self.timestamp_start = timestamp_start
self.timestamp_end = timestamp_end
## (Attributes below don't get serialized)
# Is this request replayed?
self.is_replay = False
_stateobject_attributes = dict(
httpversion=tuple,
code=int,
msg=str,
headers=ODictCaseless,
content=str,
timestamp_start=float,
timestamp_end=float
)
@classmethod
def _from_state(cls, state):
f = cls(None, None, None, None, None, None, None, None)
f._load_state(state)
return f
@classmethod
def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None):
"""
Parse an HTTP response from a file stream
"""
if not include_content:
raise NotImplementedError
rfile.reset_timestamps()
httpversion, code, msg, headers, content = http.read_response(
rfile,
request_method,
body_size_limit)
timestamp_start = rfile.first_byte_timestamp
timestamp_end = utils.timestamp()
return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start, timestamp_end)
def _assemble_first_line(self):
return 'HTTP/%s.%s %s %s' % (self.httpversion[0], self.httpversion[1], self.code, self.msg)
def _assemble_headers(self):
headers = self.headers.copy()
utils.del_all(
headers,
[
'Proxy-Connection',
'Transfer-Encoding'
]
)
if self.content:
headers["Content-Length"] = [str(len(self.content))]
elif 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content
headers["Content-Length"] = ["0"]
return str(headers)
def _assemble_head(self):
return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers())
def _assemble(self):
"""
Assembles the response for transmission to the client. We make some
modifications to make sure interception works properly.
Raises an Exception if the request cannot be assembled.
"""
if self.content == CONTENT_MISSING:
raise Exception("CONTENT_MISSING") # FIXME correct exception class
head = self._assemble_head()
if self.content:
return head + self.content
else:
return head
def _refresh_cookie(self, c, delta):
"""
Takes a cookie string c and a time delta in seconds, and returns
a refreshed cookie string.
"""
c = Cookie.SimpleCookie(str(c))
for i in c.values():
if "expires" in i:
d = parsedate_tz(i["expires"])
if d:
d = mktime_tz(d) + delta
i["expires"] = formatdate(d)
else:
# This can happen when the expires tag is invalid.
# reddit.com sends a an expires tag like this: "Thu, 31 Dec
# 2037 23:59:59 GMT", which is valid RFC 1123, but not
# strictly correct according tot he cookie spec. Browsers
# appear to parse this tolerantly - maybe we should too.
# For now, we just ignore this.
del i["expires"]
return c.output(header="").strip()
def refresh(self, now=None):
"""
This fairly complex and heuristic function refreshes a server
response for replay.
- It adjusts date, expires and last-modified headers.
- It adjusts cookie expiration.
"""
if not now:
now = time.time()
delta = now - self.timestamp_start
refresh_headers = [
"date",
"expires",
"last-modified",
]
for i in refresh_headers:
if i in self.headers:
d = parsedate_tz(self.headers[i][0])
if d:
new = mktime_tz(d) + delta
self.headers[i] = [formatdate(new)]
c = []
for i in self.headers["set-cookie"]:
c.append(self._refresh_cookie(i, delta))
if c:
self.headers["set-cookie"] = c
def get_cookies(self):
cookie_headers = self.headers.get("set-cookie")
if not cookie_headers:
return None
cookies = []
for header in cookie_headers:
pairs = [pair.partition("=") for pair in header.split(';')]
cookie_name = pairs[0][0] # the key of the first key/value pairs
cookie_value = pairs[0][2] # the value of the first key/value pairs
cookie_parameters = {key.strip().lower(): value.strip() for key, sep, value in pairs[1:]}
cookies.append((cookie_name, (cookie_value, cookie_parameters)))
return dict(cookies)
class HTTPFlow(Flow):
"""
A Flow is a collection of objects representing a single HTTP
transaction. The main attributes are:
request: HTTPRequest object
response: HTTPResponse object
error: Error object
Note that it's possible for a Flow to have both a response and an error
object. This might happen, for instance, when a response was received
from the server, but there was an error sending it back to the client.
The following additional attributes are exposed:
intercepting: Is this flow currently being intercepted?
"""
_backrefattr = Flow._backrefattr + ("request", "response")
_stateobject_attributes = Flow._stateobject_attributes.copy()
_stateobject_attributes.update(
request=HTTPRequest,
response=HTTPResponse
)
def __init__(self, client_conn, server_conn, error, request, response):
Flow.__init__(self, "http", client_conn, server_conn, error)
self.request, self.response = request, response
@classmethod
def _from_state(cls, state):
f = cls(None, None, None, None, None)
f._load_state(state)
return f
def copy(self):
f = super(HTTPFlow, self).copy()
if self.request:
f.request = self.request.copy()
if self.response:
f.response = self.request.copy()
return f
class HttpAuthenticationError(Exception):
def __init__(self, auth_headers=None):
self.auth_headers = auth_headers
def __str__(self):
return "HttpAuthenticationError"
class HTTPHandler(ProtocolHandler):
def handle_messages(self):
while self.handle_flow():
pass
self.c.close = True
def get_response_from_server(self, request):
request_raw = request._assemble()
for i in range(2):
try:
self.c.server_conn.wfile.write(request_raw)
self.c.server_conn.wfile.flush()
return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method,
body_size_limit=self.c.config.body_size_limit)
except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v:
self.c.log("error in server communication: %s" % str(v))
if i < 1:
# In any case, we try to reconnect at least once.
# This is necessary because it might be possible that we already initiated an upstream connection
# after clientconnect that has already been expired, e.g consider the following event log:
# > clientconnect (transparent mode destination known)
# > serverconnect
# > read n% of large request
# > server detects timeout, disconnects
# > read (100-n)% of large request
# > send large request upstream
self.c.server_reconnect()
else:
raise v
def handle_flow(self):
flow = HTTPFlow(self.c.client_conn, self.c.server_conn, None, None, None)
try:
flow.request = HTTPRequest.from_stream(self.c.client_conn.rfile,
body_size_limit=self.c.config.body_size_limit)
self.c.log("request", [flow.request._assemble_first_line(flow.request.form_in)])
request_reply = self.c.channel.ask("request" if LEGACY else "httprequest",
flow.request if LEGACY else flow)
if request_reply is None or request_reply == KILL:
return False
if isinstance(request_reply, HTTPResponse) or (LEGACY and isinstance(request_reply, libmproxy.flow.Response)):
flow.response = request_reply
else:
self.process_request(flow.request)
flow.response = self.get_response_from_server(flow.request)
self.c.log("response", [flow.response._assemble_response_line() if not LEGACY else flow.response._assemble().splitlines()[0]])
response_reply = self.c.channel.ask("response" if LEGACY else "httpresponse",
flow.response if LEGACY else flow)
if response_reply is None or response_reply == KILL:
return False
raw = flow.response._assemble()
self.c.client_conn.wfile.write(raw)
self.c.client_conn.wfile.flush()
flow.timestamp_end = utils.timestamp()
if (http.connection_close(flow.request.httpversion, flow.request.headers) or
http.connection_close(flow.response.httpversion, flow.response.headers)):
return False
if flow.request.form_in == "authority":
self.ssl_upgrade(flow.request)
return True
except (HttpAuthenticationError, http.HttpError, ProxyError, tcp.NetLibError), e:
self.handle_error(e, flow)
return False
def handle_error(self, error, flow=None):
code, message, headers = None, None, None
if isinstance(error, HttpAuthenticationError):
code, message, headers = 407, "Proxy Authentication Required", error.auth_headers
elif isinstance(error, (http.HttpError, ProxyError)):
code, message = error.code, error.msg
elif isinstance(error, tcp.NetLibError):
code = 502
message = error.message or error.__class__
if code:
err = "%s: %s" % (code, message)
else:
err = message
self.c.log("error: %s" %err)
if flow:
flow.error = Error(err)
self.c.channel.ask("error" if LEGACY else "httperror",
flow.error if LEGACY else flow)
else:
pass # FIXME: Is there any use case for persisting errors that occur outside of flows?
if code:
try:
self.send_error(code, message, headers)
except:
pass
def send_error(self, code, message, headers):
response = http_status.RESPONSES.get(code, "Unknown")
html_content = '<html><head>\n<title>%d %s</title>\n</head>\n<body>\n%s\n</body>\n</html>' % \
(code, response, message)
self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response))
self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version)
self.c.client_conn.wfile.write("Content-type: text/html\r\n")
self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content))
if headers:
for key, value in headers.items():
self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value))
self.c.client_conn.wfile.write("Connection: close\r\n")
self.c.client_conn.wfile.write("\r\n")
self.c.client_conn.wfile.write(html_content)
self.c.client_conn.wfile.flush()
def ssl_upgrade(self, upstream_request=None):
"""
Upgrade the connection to SSL after an authority (CONNECT) request has been made.
If the authority request has been forwarded upstream (because we have another proxy server there),
money-patch the ConnectionHandler.server_reconnect function to resend the request on reconnect.
This isn't particular beautiful code, but it isolates this rare edge-case from the
protocol-agnostic ConnectionHandler
"""
self.c.mode = "transparent"
self.c.determine_conntype()
self.c.establish_ssl(server=True, client=True)
if upstream_request:
self.c.log("Hook reconnect function")
original_reconnect_func = self.c.server_reconnect
def reconnect_http_proxy():
self.c.log("Hooked reconnect function")
self.c.log("Hook: Run original redirect")
original_reconnect_func(no_ssl=True)
self.c.log("Hook: Write CONNECT request to upstream proxy", [upstream_request._assemble_first_line()])
self.c.server_conn.wfile.write(upstream_request._assemble())
self.c.server_conn.wfile.flush()
self.c.log("Hook: Read answer to CONNECT request from proxy")
resp = HTTPResponse.from_stream(self.c.server_conn.rfile, upstream_request.method)
if resp.code != 200:
raise ProxyError(resp.code,
"Cannot reestablish SSL connection with upstream proxy: \r\n" + str(resp.headers))
self.c.log("Hook: Establish SSL with upstream proxy")
self.c.establish_ssl(server=True)
self.c.server_reconnect = reconnect_http_proxy
raise ConnectionTypeChange
def process_request(self, request):
if self.c.mode == "regular":
self.authenticate(request)
if request.form_in == "authority" and self.c.client_conn.ssl_established:
raise http.HttpError(502, "Must not CONNECT on already encrypted connection")
# If we have a CONNECT request, we might need to intercept
if request.form_in == "authority":
directly_addressed_at_mitmproxy = (self.c.mode == "regular") and not self.c.config.forward_proxy
if directly_addressed_at_mitmproxy:
self.c.establish_server_connection((request.host, request.port))
self.c.client_conn.wfile.write(
'HTTP/1.1 200 Connection established\r\n' +
('Proxy-agent: %s\r\n' % self.c.server_version) +
'\r\n'
)
self.c.client_conn.wfile.flush()
self.ssl_upgrade() # raises ConnectionTypeChange exception
if self.c.mode == "regular":
if request.form_in == "authority":
pass
elif request.form_in == "absolute":
if request.scheme != "http":
raise http.HttpError(400, "Invalid Request")
if not self.c.config.forward_proxy:
request.form_out = "origin"
if ((not self.c.server_conn) or
(self.c.server_conn.address != (request.host, request.port))):
self.c.establish_server_connection((request.host, request.port))
else:
raise http.HttpError(400, "Invalid Request")
def authenticate(self, request):
if self.c.config.authenticator:
if self.c.config.authenticator.authenticate(request.headers):
self.c.config.authenticator.clean(request.headers)
else:
raise HttpAuthenticationError(self.c.config.authenticator.auth_challenge_headers())
return request.headers

View File

@ -7,44 +7,6 @@ from proxy import ProxyError, KILL
LEGACY = True
def _handle(msg, conntype, connection_handler, *args, **kwargs):
handler = None
if conntype == "http":
handler = HTTPHandler(connection_handler)
else:
raise NotImplementedError
f = getattr(handler, "handle_" + msg)
return f(*args, **kwargs)
def handle_messages(conntype, connection_handler):
_handle("messages", conntype, connection_handler)
def handle_error(conntype, connection_handler, error):
_handle("error", conntype, connection_handler, error)
class ConnectionTypeChange(Exception):
pass
class ProtocolHandler(object):
def __init__(self, c):
self.c = c
def handle_messages(self):
"""
This method gets called if the connection has been established.
"""
raise NotImplementedError
def handle_error(self, error):
"""
This method gets called should there be an uncaught exception during the connection.
This might happen outside of handle_messages, e.g. if the initial SSL handshake fails in transparent mode.
"""
raise NotImplementedError
"""
Minimalistic cleanroom reimplemementation of a couple of flow.* classes. Most functionality is missing,
@ -202,6 +164,7 @@ class HTTPRequest(HTTPMessage):
if not r:
raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line))
host, port, _ = r
path = None
else:
form_in = "absolute"
r = http.parse_init_proxy(request_line)

View File

@ -5,7 +5,6 @@ import utils, flow, version, platform, controller
TRANSPARENT_SSL_PORTS = [443, 8443]
KILL = 0
class ProxyError(Exception):
@ -15,10 +14,6 @@ class ProxyError(Exception):
def __str__(self):
return "ProxyError(%s, %s)" % (self.code, self.msg)
import protocol
class Log:
def __init__(self, msg):
self.msg = msg
@ -39,7 +34,7 @@ class ProxyConfig:
self.certstore = certutils.CertStore()
class ClientConnection(tcp.BaseHandler):
class ClientConnection(tcp.BaseHandler, flow.SimpleStateObject):
def __init__(self, client_connection, address, server):
tcp.BaseHandler.__init__(self, client_connection, address, server)
@ -47,6 +42,13 @@ class ClientConnection(tcp.BaseHandler):
self.timestamp_end = None
self.timestamp_ssl_setup = None
_stateobject_attributes = dict(
timestamp_start=float,
timestamp_end=float,
timestamp_ssl_setup=float,
# FIXME: Add missing attributes
)
def convert_to_ssl(self, *args, **kwargs):
tcp.BaseHandler.convert_to_ssl(self, *args, **kwargs)
self.timestamp_ssl_setup = utils.timestamp()
@ -56,7 +58,7 @@ class ClientConnection(tcp.BaseHandler):
self.timestamp_end = utils.timestamp()
class ServerConnection(tcp.TCPClient):
class ServerConnection(tcp.TCPClient, flow.SimpleStateObject):
def __init__(self, address):
tcp.TCPClient.__init__(self, address)
@ -66,6 +68,15 @@ class ServerConnection(tcp.TCPClient):
self.timestamp_tcp_setup = None
self.timestamp_ssl_setup = None
_stateobject_attributes = dict(
peername=tuple,
timestamp_start=float,
timestamp_end=float,
timestamp_tcp_setup=float,
timestamp_ssl_setup=float,
# FIXME: Add missing attributes
)
def connect(self):
self.timestamp_start = utils.timestamp()
tcp.TCPClient.connect(self)
@ -119,6 +130,8 @@ class RequestReplayThread(threading.Thread):
"""
import protocol
class ConnectionHandler:
def __init__(self, config, client_connection, client_address, server, channel, server_version):
self.config = config
@ -180,6 +193,8 @@ class ConnectionHandler:
protocol.handle_error(self.conntype, self, e)
except Exception, e:
self.log(e.__class__)
import traceback
self.log(traceback.format_exc())
self.log(str(e))
self.del_server_connection()