import Cookie, urllib, urlparse, time, copy from email.utils import parsedate_tz, formatdate, mktime_tz import netlib.utils from netlib import http, tcp, http_status, stateobject, odict from netlib.odict import ODict, ODictCaseless from . import ProtocolHandler, ConnectionTypeChange, KILL from .. import encoding, utils, version, filt, controller from ..proxy import ProxyError, ClientConnection, ServerConnection HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" CONTENT_MISSING = 0 LEGACY = True def get_line(fp): """ Get a line, possibly preceded by a blank. """ line = fp.readline() if line == "\r\n" or line == "\n": # Possible leftover from previous message line = fp.readline() if line == "": raise tcp.NetLibDisconnect return line class decoded(object): """ A context manager that decodes a request or response, and then re-encodes it with the same encoding after execution of the block. Example: with decoded(request): request.content = request.content.replace("foo", "bar") """ def __init__(self, o): self.o = o ce = o.headers.get_first("content-encoding") if ce in encoding.ENCODINGS: self.ce = ce else: self.ce = None def __enter__(self): if self.ce: self.o.decode() def __exit__(self, type, value, tb): if self.ce: self.o.encode(self.ce) # FIXME: Move out of http class BackreferenceMixin(object): """ If an attribute from the _backrefattr tuple is set, this mixin sets a reference back on the attribute object. Example: e = Error() f = Flow() f.error = e assert f is e.flow """ _backrefattr = tuple() def __setattr__(self, key, value): super(BackreferenceMixin, self).__setattr__(key, value) if key in self._backrefattr and value is not None: setattr(value, self._backrefname, self) # FIXME: Move out of http class Error(stateobject.SimpleStateObject): """ An Error. This is distinct from an HTTP error response (say, a code 500), which is represented by a normal Response object. This class is responsible for indicating errors that fall outside of normal HTTP communications, like interrupted connections, timeouts, protocol errors. Exposes the following attributes: flow: Flow object msg: Message describing the error timestamp: Seconds since the epoch """ def __init__(self, msg, timestamp=None): self.msg = msg self.timestamp = timestamp or utils.timestamp() _stateobject_attributes = dict( msg=str, timestamp=float ) def copy(self): c = copy.copy(self) return c # FIXME: Move out of http class Flow(stateobject.SimpleStateObject, BackreferenceMixin): def __init__(self, conntype, client_conn, server_conn, error): self.conntype = conntype self.client_conn = client_conn self.server_conn = server_conn self.error = error _backrefattr = ("error",) _backrefname = "flow" _stateobject_attributes = dict( error=Error, client_conn=ClientConnection, server_conn=ServerConnection, conntype=str ) def _get_state(self): d = super(Flow, self)._get_state() d.update(version=version.IVERSION) return d @classmethod def _from_state(cls, state): f = cls(None, None, None, None) f._load_state(state) return f def copy(self): f = copy.copy(self) if self.error: f.error = self.error.copy() return f def modified(self): """ Has this Flow been modified? """ if self._backup: return self._backup != self._get_state() else: return False def backup(self, force=False): """ Save a backup of this Flow, which can be reverted to using a call to .revert(). """ if not self._backup: self._backup = self._get_state() def revert(self): """ Revert to the last backed up state. """ if self._backup: self._load_state(self._backup) self._backup = None class HTTPMessage(stateobject.SimpleStateObject): def __init__(self): self.flow = None # Will usually set by backref mixin def get_decoded_content(self): """ Returns the decoded content based on the current Content-Encoding header. Doesn't change the message iteself or its headers. """ ce = self.headers.get_first("content-encoding") if not self.content or ce not in encoding.ENCODINGS: return self.content return encoding.decode(ce, self.content) def decode(self): """ Decodes content based on the current Content-Encoding header, then removes the header. If there is no Content-Encoding header, no action is taken. Returns True if decoding succeeded, False otherwise. """ ce = self.headers.get_first("content-encoding") if not self.content or ce not in encoding.ENCODINGS: return False data = encoding.decode(ce, self.content) if data is None: return False self.content = data del self.headers["content-encoding"] return True def encode(self, e): """ Encodes content with the encoding e, where e is "gzip", "deflate" or "identity". """ # FIXME: Error if there's an existing encoding header? self.content = encoding.encode(e, self.content) self.headers["content-encoding"] = [e] def size(self, **kwargs): """ Size in bytes of a fully rendered message, including headers and HTTP lead-in. """ hl = len(self._assemble_head(**kwargs)) if self.content: return hl + len(self.content) else: return hl def copy(self): c = copy.copy(self) c.headers = self.headers.copy() return c def replace(self, pattern, repl, *args, **kwargs): """ Replaces a regular expression pattern with repl in both the headers and the body of the message. Encoded content will be decoded before replacement, and re-encoded afterwards. Returns the number of replacements made. """ with decoded(self): self.content, c = utils.safe_subn(pattern, repl, self.content, *args, **kwargs) c += self.headers.replace(pattern, repl, *args, **kwargs) return c @classmethod def from_stream(cls, rfile, include_content=True, body_size_limit=None): """ Parse an HTTP message from a file stream """ raise NotImplementedError def _assemble_first_line(self): """ Returns the assembled request/response line """ raise NotImplementedError def _assemble_headers(self): """ Returns the assembled headers """ raise NotImplementedError def _assemble_head(self): """ Returns the assembled request/response line plus headers """ raise NotImplementedError def _assemble(self): """ Returns the assembled request/response """ raise NotImplementedError class HTTPRequest(HTTPMessage): """ An HTTP request. Exposes the following attributes: flow: Flow object the request belongs to headers: ODictCaseless object content: Content of the request, None, or CONTENT_MISSING if there is content associated, but not present. CONTENT_MISSING evaluates to False to make checking for the presence of content natural. form_in: The request form which mitmproxy has received. The following values are possible: - origin (GET /index.html) - absolute (GET http://example.com:80/index.html) - authority-form (CONNECT example.com:443) - asterisk-form (OPTIONS *) Details: http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-25#section-5.3 form_out: The request form which mitmproxy has send out to the destination method: HTTP method scheme: URL scheme (http/https) (absolute-form only) host: Host portion of the URL (absolute-form and authority-form only) port: Destination port (absolute-form and authority-form only) path: Path portion of the URL (not present in authority-form) httpversion: HTTP version tuple timestamp_start: Timestamp indicating when request transmission started timestamp_end: Timestamp indicating when request transmission ended """ def __init__(self, form_in, method, scheme, host, port, path, httpversion, headers, content, timestamp_start, timestamp_end, form_out=None): assert isinstance(headers, ODictCaseless) or not headers HTTPMessage.__init__(self) self.form_in = form_in self.method = method self.scheme = scheme self.host = host self.port = port self.path = path self.httpversion = httpversion self.headers = headers self.content = content self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end self.form_out = form_out or form_in # Have this request's cookies been modified by sticky cookies or auth? self.stickycookie = False self.stickyauth = False # Is this request replayed? self.is_replay = False _stateobject_attributes = dict( form_in=str, method=str, scheme=str, host=str, port=int, path=str, httpversion=tuple, headers=ODictCaseless, content=str, timestamp_start=float, timestamp_end=float, form_out=str ) @classmethod def _from_state(cls, state): f = cls(None, None, None, None, None, None, None, None, None, None, None) f._load_state(state) return f @classmethod def from_stream(cls, rfile, include_content=True, body_size_limit=None): """ Parse an HTTP request from a file stream """ httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \ = None, None, None, None, None, None, None, None, None, None rfile.reset_timestamps() request_line = get_line(rfile) timestamp_start = rfile.first_byte_timestamp request_line_parts = http.parse_init(request_line) if not request_line_parts: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) method, path, httpversion = request_line_parts if path == '*': form_in = "asterisk" elif path.startswith("/"): form_in = "origin" if not netlib.utils.isascii(path): raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) elif method.upper() == 'CONNECT': form_in = "authority" r = http.parse_init_connect(request_line) if not r: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) host, port, _ = r path = None else: form_in = "absolute" r = http.parse_init_proxy(request_line) if not r: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) _, scheme, host, port, path, _ = r headers = http.read_headers(rfile) if headers is None: raise http.HttpError(400, "Invalid headers") if include_content: content = http.read_http_body(rfile, headers, body_size_limit, True) timestamp_end = utils.timestamp() return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content, timestamp_start, timestamp_end) def _assemble_first_line(self, form=None): form = form or self.form_out if form == "asterisk" or \ form == "origin": request_line = '%s %s HTTP/%s.%s' % (self.method, self.path, self.httpversion[0], self.httpversion[1]) elif form == "authority": request_line = '%s %s:%s HTTP/%s.%s' % (self.method, self.host, self.port, self.httpversion[0], self.httpversion[1]) elif form == "absolute": request_line = '%s %s://%s:%s%s HTTP/%s.%s' % \ (self.method, self.scheme, self.host, self.port, self.path, self.httpversion[0], self.httpversion[1]) else: raise http.HttpError(400, "Invalid request form") return request_line def _assemble_headers(self): headers = self.headers.copy() utils.del_all( headers, [ 'Proxy-Connection', 'Keep-Alive', 'Connection', 'Transfer-Encoding' ] ) if not 'host' in headers: headers["Host"] = [utils.hostport(self.scheme, self.host, self.port)] if self.content: headers["Content-Length"] = [str(len(self.content))] elif 'Transfer-Encoding' in self.headers: # content-length for e.g. chuncked transfer-encoding with no content headers["Content-Length"] = ["0"] return str(headers) def _assemble_head(self, form=None): return "%s\r\n%s\r\n" % (self._assemble_first_line(form), self._assemble_headers()) def _assemble(self, form=None): """ Assembles the request for transmission to the server. We make some modifications to make sure interception works properly. Raises an Exception if the request cannot be assembled. """ if self.content == CONTENT_MISSING: raise Exception("CONTENT_MISSING") # FIXME correct exception class head = self._assemble_head(form) if self.content: return head + self.content else: return head def __hash__(self): return id(self) def anticache(self): """ Modifies this request to remove headers that might produce a cached response. That is, we remove ETags and If-Modified-Since headers. """ delheaders = [ "if-modified-since", "if-none-match", ] for i in delheaders: del self.headers[i] def anticomp(self): """ Modifies this request to remove headers that will compress the resource's data. """ self.headers["accept-encoding"] = ["identity"] def constrain_encoding(self): """ Limits the permissible Accept-Encoding values, based on what we can decode appropriately. """ if self.headers["accept-encoding"]: self.headers["accept-encoding"] = [', '.join( e for e in encoding.ENCODINGS if e in self.headers["accept-encoding"][0] )] def get_form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. Returns an empty ODict if there is no data or the content-type indicates non-form data. """ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): return ODict(utils.urldecode(self.content)) return ODict([]) def set_form_urlencoded(self, odict): """ Sets the body to the URL-encoded form data, and adds the appropriate content-type header. Note that this will destory the existing body if there is one. """ # FIXME: If there's an existing content-type header indicating a # url-encoded form, leave it alone. self.headers["Content-Type"] = [HDR_FORM_URLENCODED] self.content = utils.urlencode(odict.lst) def get_path_components(self): """ Returns the path components of the URL as a list of strings. Components are unquoted. """ _, _, path, _, _, _ = urlparse.urlparse(self.get_url()) return [urllib.unquote(i) for i in path.split("/") if i] def set_path_components(self, lst): """ Takes a list of strings, and sets the path component of the URL. Components are quoted. """ lst = [urllib.quote(i, safe="") for i in lst] path = "/" + "/".join(lst) scheme, netloc, _, params, query, fragment = urlparse.urlparse(self.get_url()) self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment])) def get_query(self): """ Gets the request query string. Returns an ODict object. """ _, _, _, _, query, _ = urlparse.urlparse(self.get_url()) if query: return ODict(utils.urldecode(query)) return ODict([]) def set_query(self, odict): """ Takes an ODict object, and sets the request query string. """ scheme, netloc, path, params, _, fragment = urlparse.urlparse(self.get_url()) query = utils.urlencode(odict.lst) self.set_url(urlparse.urlunparse([scheme, netloc, path, params, query, fragment])) def get_url(self, hostheader=False): """ Returns a URL string, constructed from the Request's URL compnents. If hostheader is True, we use the value specified in the request Host header to construct the URL. """ if hostheader: host = self.headers.get_first("host") or self.host else: host = self.host host = host.encode("idna") return utils.unparse_url(self.scheme, host, self.port, self.path).encode('ascii') def set_url(self, url): """ Parses a URL specification, and updates the Request's information accordingly. Returns False if the URL was invalid, True if the request succeeded. """ parts = http.parse_url(url) if not parts: return False self.scheme, self.host, self.port, self.path = parts return True def get_cookies(self): cookie_headers = self.headers.get("cookie") if not cookie_headers: return None cookies = [] for header in cookie_headers: pairs = [pair.partition("=") for pair in header.split(';')] cookies.extend((pair[0], (pair[2], {})) for pair in pairs) return dict(cookies) def replace(self, pattern, repl, *args, **kwargs): """ Replaces a regular expression pattern with repl in the headers, the request path and the body of the request. Encoded content will be decoded before replacement, and re-encoded afterwards. Returns the number of replacements made. """ c = HTTPMessage.replace(self, pattern, repl, *args, **kwargs) self.path, pc = utils.safe_subn(pattern, repl, self.path, *args, **kwargs) c += pc return c class HTTPResponse(HTTPMessage): """ An HTTP response. Exposes the following attributes: flow: Flow object the request belongs to code: HTTP response code msg: HTTP response message headers: ODict object content: Content of the request, None, or CONTENT_MISSING if there is content associated, but not present. CONTENT_MISSING evaluates to False to make checking for the presence of content natural. httpversion: HTTP version tuple timestamp_start: Timestamp indicating when request transmission started timestamp_end: Timestamp indicating when request transmission ended """ def __init__(self, httpversion, code, msg, headers, content, timestamp_start, timestamp_end): assert isinstance(headers, ODictCaseless) HTTPMessage.__init__(self) self.httpversion = httpversion self.code = code self.msg = msg self.headers = headers self.content = content self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end # Is this request replayed? self.is_replay = False _stateobject_attributes = dict( httpversion=tuple, code=int, msg=str, headers=ODictCaseless, content=str, timestamp_start=float, timestamp_end=float ) @classmethod def _from_state(cls, state): f = cls(None, None, None, None, None, None, None, None) f._load_state(state) return f @classmethod def from_stream(cls, rfile, request_method, include_content=True, body_size_limit=None): """ Parse an HTTP response from a file stream """ if not include_content: raise NotImplementedError rfile.reset_timestamps() httpversion, code, msg, headers, content = http.read_response( rfile, request_method, body_size_limit) timestamp_start = rfile.first_byte_timestamp timestamp_end = utils.timestamp() return HTTPResponse(httpversion, code, msg, headers, content, timestamp_start, timestamp_end) def _assemble_first_line(self): return 'HTTP/%s.%s %s %s' % (self.httpversion[0], self.httpversion[1], self.code, self.msg) def _assemble_headers(self): headers = self.headers.copy() utils.del_all( headers, [ 'Proxy-Connection', 'Transfer-Encoding' ] ) if self.content: headers["Content-Length"] = [str(len(self.content))] elif 'Transfer-Encoding' in self.headers: # add content-length for chuncked transfer-encoding with no content headers["Content-Length"] = ["0"] return str(headers) def _assemble_head(self): return '%s\r\n%s\r\n' % (self._assemble_first_line(), self._assemble_headers()) def _assemble(self): """ Assembles the response for transmission to the client. We make some modifications to make sure interception works properly. Raises an Exception if the request cannot be assembled. """ if self.content == CONTENT_MISSING: raise Exception("CONTENT_MISSING") # FIXME correct exception class head = self._assemble_head() if self.content: return head + self.content else: return head def _refresh_cookie(self, c, delta): """ Takes a cookie string c and a time delta in seconds, and returns a refreshed cookie string. """ c = Cookie.SimpleCookie(str(c)) for i in c.values(): if "expires" in i: d = parsedate_tz(i["expires"]) if d: d = mktime_tz(d) + delta i["expires"] = formatdate(d) else: # This can happen when the expires tag is invalid. # reddit.com sends a an expires tag like this: "Thu, 31 Dec # 2037 23:59:59 GMT", which is valid RFC 1123, but not # strictly correct according tot he cookie spec. Browsers # appear to parse this tolerantly - maybe we should too. # For now, we just ignore this. del i["expires"] return c.output(header="").strip() def refresh(self, now=None): """ This fairly complex and heuristic function refreshes a server response for replay. - It adjusts date, expires and last-modified headers. - It adjusts cookie expiration. """ if not now: now = time.time() delta = now - self.timestamp_start refresh_headers = [ "date", "expires", "last-modified", ] for i in refresh_headers: if i in self.headers: d = parsedate_tz(self.headers[i][0]) if d: new = mktime_tz(d) + delta self.headers[i] = [formatdate(new)] c = [] for i in self.headers["set-cookie"]: c.append(self._refresh_cookie(i, delta)) if c: self.headers["set-cookie"] = c def get_cookies(self): cookie_headers = self.headers.get("set-cookie") if not cookie_headers: return None cookies = [] for header in cookie_headers: pairs = [pair.partition("=") for pair in header.split(';')] cookie_name = pairs[0][0] # the key of the first key/value pairs cookie_value = pairs[0][2] # the value of the first key/value pairs cookie_parameters = {key.strip().lower(): value.strip() for key, sep, value in pairs[1:]} cookies.append((cookie_name, (cookie_value, cookie_parameters))) return dict(cookies) class HTTPFlow(Flow): """ A Flow is a collection of objects representing a single HTTP transaction. The main attributes are: request: HTTPRequest object response: HTTPResponse object error: Error object Note that it's possible for a Flow to have both a response and an error object. This might happen, for instance, when a response was received from the server, but there was an error sending it back to the client. The following additional attributes are exposed: intercepting: Is this flow currently being intercepted? """ def __init__(self, client_conn, server_conn, error, request, response): Flow.__init__(self, "http", client_conn, server_conn, error) self.request = request self.response = response self.intercepting = False # FIXME: Should that rather be an attribute of Flow? self._backup = None _backrefattr = Flow._backrefattr + ("request", "response") _stateobject_attributes = Flow._stateobject_attributes.copy() _stateobject_attributes.update( request=HTTPRequest, response=HTTPResponse ) @classmethod def _from_state(cls, state): f = cls(None, None, None, None, None) f._load_state(state) return f def copy(self): f = super(HTTPFlow, self).copy() if self.request: f.request = self.request.copy() if self.response: f.response = self.request.copy() return f def match(self, f): """ Match this flow against a compiled filter expression. Returns True if matched, False if not. If f is a string, it will be compiled as a filter expression. If the expression is invalid, ValueError is raised. """ if isinstance(f, basestring): f = filt.parse(f) if not f: raise ValueError("Invalid filter expression.") if f: return f(self) return True def kill(self, master): """ Kill this request. """ self.error = Error("Connection killed") self.error.reply = controller.DummyReply() if self.request and not self.request.reply.acked: self.request.reply(KILL) elif self.response and not self.response.reply.acked: self.response.reply(KILL) master.handle_error(self) self.intercepting = False def intercept(self): """ Intercept this Flow. Processing will stop until accept_intercept is called. """ self.intercepting = True def accept_intercept(self): """ Continue with the flow - called after an intercept(). """ assert self.intercepting if self.request: if not self.request.reply.acked: self.request.reply() elif self.response and not self.response.reply.acked: self.response.reply() self.intercepting = False def replace(self, pattern, repl, *args, **kwargs): """ Replaces a regular expression pattern with repl in both request and response of the flow. Encoded content will be decoded before replacement, and re-encoded afterwards. Returns the number of replacements made. """ c = self.request.replace(pattern, repl, *args, **kwargs) if self.response: c += self.response.replace(pattern, repl, *args, **kwargs) return c class HttpAuthenticationError(Exception): def __init__(self, auth_headers=None): self.auth_headers = auth_headers def __str__(self): return "HttpAuthenticationError" class HTTPHandler(ProtocolHandler): def handle_messages(self): while self.handle_flow(): pass self.c.close = True def get_response_from_server(self, request): request_raw = request._assemble() for i in range(2): try: self.c.server_conn.wfile.write(request_raw) self.c.server_conn.wfile.flush() return HTTPResponse.from_stream(self.c.server_conn.rfile, request.method, body_size_limit=self.c.config.body_size_limit) except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % str(v)) if i < 1: # In any case, we try to reconnect at least once. # This is necessary because it might be possible that we already initiated an upstream connection # after clientconnect that has already been expired, e.g consider the following event log: # > clientconnect (transparent mode destination known) # > serverconnect # > read n% of large request # > server detects timeout, disconnects # > read (100-n)% of large request # > send large request upstream self.c.server_reconnect() else: raise v def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, None, None, None) try: flow.request = HTTPRequest.from_stream(self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) self.c.log("request", [flow.request._assemble_first_line(flow.request.form_in)]) request_reply = self.c.channel.ask("request" if LEGACY else "httprequest", flow.request if LEGACY else flow) if request_reply is None or request_reply == KILL: return False if isinstance(request_reply, HTTPResponse): flow.response = request_reply else: self.process_request(flow.request) flow.response = self.get_response_from_server(flow.request) self.c.log("response", [flow.response._assemble_response_line() if not LEGACY else flow.response._assemble().splitlines()[0]]) response_reply = self.c.channel.ask("response" if LEGACY else "httpresponse", flow.response if LEGACY else flow) if response_reply is None or response_reply == KILL: return False raw = flow.response._assemble() self.c.client_conn.wfile.write(raw) self.c.client_conn.wfile.flush() flow.timestamp_end = utils.timestamp() if (http.connection_close(flow.request.httpversion, flow.request.headers) or http.connection_close(flow.response.httpversion, flow.response.headers)): return False if flow.request.form_in == "authority": self.ssl_upgrade(flow.request) return True except (HttpAuthenticationError, http.HttpError, ProxyError, tcp.NetLibError), e: self.handle_error(e, flow) return False def handle_error(self, error, flow=None): code, message, headers = None, None, None if isinstance(error, HttpAuthenticationError): code, message, headers = 407, "Proxy Authentication Required", error.auth_headers elif isinstance(error, (http.HttpError, ProxyError)): code, message = error.code, error.msg elif isinstance(error, tcp.NetLibError): code = 502 message = error.message or error.__class__ if code: err = "%s: %s" % (code, message) else: err = message self.c.log("error: %s" %err) if flow: flow.error = Error(err) self.c.channel.ask("error" if LEGACY else "httperror", flow.error if LEGACY else flow) else: pass # FIXME: Is there any use case for persisting errors that occur outside of flows? if code: try: self.send_error(code, message, headers) except: pass def send_error(self, code, message, headers): response = http_status.RESPONSES.get(code, "Unknown") html_content = '\n%d %s\n\n\n%s\n\n' % \ (code, response, message) self.c.client_conn.wfile.write("HTTP/1.1 %s %s\r\n" % (code, response)) self.c.client_conn.wfile.write("Server: %s\r\n" % self.c.server_version) self.c.client_conn.wfile.write("Content-type: text/html\r\n") self.c.client_conn.wfile.write("Content-Length: %d\r\n" % len(html_content)) if headers: for key, value in headers.items(): self.c.client_conn.wfile.write("%s: %s\r\n" % (key, value)) self.c.client_conn.wfile.write("Connection: close\r\n") self.c.client_conn.wfile.write("\r\n") self.c.client_conn.wfile.write(html_content) self.c.client_conn.wfile.flush() def ssl_upgrade(self, upstream_request=None): """ Upgrade the connection to SSL after an authority (CONNECT) request has been made. If the authority request has been forwarded upstream (because we have another proxy server there), money-patch the ConnectionHandler.server_reconnect function to resend the request on reconnect. This isn't particular beautiful code, but it isolates this rare edge-case from the protocol-agnostic ConnectionHandler """ self.c.mode = "transparent" self.c.determine_conntype() self.c.establish_ssl(server=True, client=True) if upstream_request: self.c.log("Hook reconnect function") original_reconnect_func = self.c.server_reconnect def reconnect_http_proxy(): self.c.log("Hooked reconnect function") self.c.log("Hook: Run original redirect") original_reconnect_func(no_ssl=True) self.c.log("Hook: Write CONNECT request to upstream proxy", [upstream_request._assemble_first_line()]) self.c.server_conn.wfile.write(upstream_request._assemble()) self.c.server_conn.wfile.flush() self.c.log("Hook: Read answer to CONNECT request from proxy") resp = HTTPResponse.from_stream(self.c.server_conn.rfile, upstream_request.method) if resp.code != 200: raise ProxyError(resp.code, "Cannot reestablish SSL connection with upstream proxy: \r\n" + str(resp.headers)) self.c.log("Hook: Establish SSL with upstream proxy") self.c.establish_ssl(server=True) self.c.server_reconnect = reconnect_http_proxy raise ConnectionTypeChange def process_request(self, request): if self.c.mode == "regular": self.authenticate(request) if request.form_in == "authority" and self.c.client_conn.ssl_established: raise http.HttpError(502, "Must not CONNECT on already encrypted connection") # If we have a CONNECT request, we might need to intercept if request.form_in == "authority": directly_addressed_at_mitmproxy = (self.c.mode == "regular") and not self.c.config.forward_proxy if directly_addressed_at_mitmproxy: self.c.establish_server_connection((request.host, request.port)) self.c.client_conn.wfile.write( 'HTTP/1.1 200 Connection established\r\n' + ('Proxy-agent: %s\r\n' % self.c.server_version) + '\r\n' ) self.c.client_conn.wfile.flush() self.ssl_upgrade() # raises ConnectionTypeChange exception if self.c.mode == "regular": if request.form_in == "authority": pass elif request.form_in == "absolute": if request.scheme != "http": raise http.HttpError(400, "Invalid Request") if not self.c.config.forward_proxy: request.form_out = "origin" if ((not self.c.server_conn) or (self.c.server_conn.address != (request.host, request.port))): self.c.establish_server_connection((request.host, request.port)) else: raise http.HttpError(400, "Invalid Request") def authenticate(self, request): if self.c.config.authenticator: if self.c.config.authenticator.authenticate(request.headers): self.c.config.authenticator.clean(request.headers) else: raise HttpAuthenticationError(self.c.config.authenticator.auth_challenge_headers()) return request.headers