mitmproxy/netlib/http/http1/read.py
Maximilian Hils 11e7f476bd wip
2015-09-15 19:12:15 +02:00

347 lines
10 KiB
Python

from __future__ import absolute_import, print_function, division
import time
import sys
import re
from ... import utils
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException
from .. import Request, Response, Headers
ALPN_PROTO_HTTP1 = 'http/1.1'
def read_request(rfile, body_size_limit=None):
request = read_request_head(rfile)
request.body = read_message_body(rfile, request, limit=body_size_limit)
request.timestamp_end = time.time()
return request
def read_request_head(rfile):
"""
Parse an HTTP request head (request line + headers) from an input stream
Args:
rfile: The input stream
body_size_limit (bool): Maximum body size
Returns:
The HTTP request object
Raises:
HttpReadDisconnect: If no bytes can be read from rfile.
HttpSyntaxException: If the input is invalid.
HttpException: A different error occured.
"""
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
rfile.reset_timestamps()
form, method, scheme, host, port, path, http_version = _read_request_line(rfile)
headers = _read_headers(rfile)
if hasattr(rfile, "first_byte_timestamp"):
# more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
return Request(
form, method, scheme, host, port, path, http_version, headers, None, timestamp_start
)
def read_response(rfile, request, body_size_limit=None):
response = read_response_head(rfile)
response.body = read_message_body(rfile, request, response, body_size_limit)
response.timestamp_end = time.time()
return response
def read_response_head(rfile):
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
rfile.reset_timestamps()
http_version, status_code, message = _read_response_line(rfile)
headers = _read_headers(rfile)
if hasattr(rfile, "first_byte_timestamp"):
# more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
return Response(
http_version,
status_code,
message,
headers,
None,
timestamp_start
)
def read_message_body(*args, **kwargs):
chunks = read_message_body_chunked(*args, **kwargs)
return b"".join(chunks)
def read_message_body_chunked(rfile, request, response=None, limit=None, max_chunk_size=None):
"""
Read an HTTP message body:
Args:
If a request body should be read, only request should be passed.
If a response body should be read, both request and response should be passed.
Raises:
HttpException
"""
if not response:
headers = request.headers
response_code = None
is_request = True
else:
headers = response.headers
response_code = response.status_code
is_request = False
if not limit or limit < 0:
limit = sys.maxsize
if not max_chunk_size:
max_chunk_size = limit
expected_size = expected_http_body_size(
headers, is_request, request.method, response_code
)
if expected_size is None:
for x in _read_chunked(rfile, limit):
yield x
elif expected_size >= 0:
if limit is not None and expected_size > limit:
raise HttpException(
"HTTP Body too large. "
"Limit is {}, content length was advertised as {}".format(limit, expected_size)
)
bytes_left = expected_size
while bytes_left:
chunk_size = min(bytes_left, max_chunk_size)
content = rfile.read(chunk_size)
yield content
bytes_left -= chunk_size
else:
bytes_left = limit
while bytes_left:
chunk_size = min(bytes_left, max_chunk_size)
content = rfile.read(chunk_size)
if not content:
return
yield content
bytes_left -= chunk_size
not_done = rfile.read(1)
if not_done:
raise HttpException("HTTP body too large. Limit is {}.".format(limit))
def connection_close(http_version, headers):
"""
Checks the message to see if the client connection should be closed
according to RFC 2616 Section 8.1.
"""
# At first, check if we have an explicit Connection header.
if b"connection" in headers:
toks = utils.get_header_tokens(headers, "connection")
if b"close" in toks:
return True
elif b"keep-alive" in toks:
return False
# If we don't have a Connection header, HTTP 1.1 connections are assumed to
# be persistent
return http_version != (1, 1)
def expected_http_body_size(
headers,
is_request,
request_method,
response_code,
):
"""
Returns the expected body length:
- a positive integer, if the size is known in advance
- None, if the size in unknown in advance (chunked encoding)
- -1, if all data should be read until end of stream.
Raises:
HttpSyntaxException, if the content length header is invalid
"""
# Determine response size according to
# http://tools.ietf.org/html/rfc7230#section-3.3
if request_method:
request_method = request_method.upper()
is_empty_response = (not is_request and (
request_method == b"HEAD" or
100 <= response_code <= 199 or
(response_code == 200 and request_method == b"CONNECT") or
response_code in (204, 304)
))
if is_empty_response:
return 0
if is_request and headers.get(b"expect", b"").lower() == b"100-continue":
return 0
if b"chunked" in headers.get(b"transfer-encoding", b"").lower():
return None
if b"content-length" in headers:
try:
size = int(headers[b"content-length"])
if size < 0:
raise ValueError()
return size
except ValueError:
raise HttpSyntaxException("Unparseable Content Length")
if is_request:
return 0
return -1
def _get_first_line(rfile):
line = rfile.readline()
if line == b"\r\n" or line == b"\n":
# Possible leftover from previous message
line = rfile.readline()
if not line:
raise HttpReadDisconnect()
return line
def _read_request_line(rfile):
line = _get_first_line(rfile)
try:
method, path, http_version = line.strip().split(b" ")
if path == b"*" or path.startswith(b"/"):
form = "relative"
path.decode("ascii") # should not raise a ValueError
scheme, host, port = None, None, None
elif method == b"CONNECT":
form = "authority"
host, port = _parse_authority_form(path)
scheme, path = None, None
else:
form = "absolute"
scheme, host, port, path = utils.parse_url(path)
except ValueError:
raise HttpSyntaxException("Bad HTTP request line: {}".format(line))
return form, method, scheme, host, port, path, http_version
def _parse_authority_form(hostport):
"""
Returns (host, port) if hostport is a valid authority-form host specification.
http://tools.ietf.org/html/draft-luotonen-web-proxy-tunneling-01 section 3.1
Raises:
ValueError, if the input is malformed
"""
try:
host, port = hostport.split(b":")
port = int(port)
if not utils.is_valid_host(host) or not utils.is_valid_port(port):
raise ValueError()
except ValueError:
raise ValueError("Invalid host specification: {}".format(hostport))
return host, port
def _read_response_line(rfile):
line = _get_first_line(rfile)
try:
parts = line.strip().split(b" ")
if len(parts) == 2: # handle missing message gracefully
parts.append(b"")
http_version, status_code, message = parts
status_code = int(status_code)
_check_http_version(http_version)
except ValueError:
raise HttpSyntaxException("Bad HTTP response line: {}".format(line))
return http_version, status_code, message
def _check_http_version(http_version):
if not re.match(rb"^HTTP/\d\.\d$", http_version):
raise HttpSyntaxException("Unknown HTTP version: {}".format(http_version))
def _read_headers(rfile):
"""
Read a set of headers.
Stop once a blank line is reached.
Returns:
A headers object
Raises:
HttpSyntaxException
"""
ret = []
while True:
line = rfile.readline()
if not line or line == b"\r\n" or line == b"\n":
break
if line[0] in b" \t":
if not ret:
raise HttpSyntaxException("Invalid headers")
# continued header
ret[-1][1] = ret[-1][1] + b'\r\n ' + line.strip()
else:
try:
name, value = line.split(b":", 1)
value = value.strip()
ret.append([name, value])
except ValueError:
raise HttpSyntaxException("Invalid headers")
return Headers(ret)
def _read_chunked(rfile, limit):
"""
Read a HTTP body with chunked transfer encoding.
Args:
rfile: the input file
limit: A positive integer
"""
total = 0
while True:
line = rfile.readline(128)
if line == b"":
raise HttpException("Connection closed prematurely")
if line != b"\r\n" and line != b"\n":
try:
length = int(line, 16)
except ValueError:
raise HttpSyntaxException("Invalid chunked encoding length: {}".format(line))
total += length
if total > limit:
raise HttpException(
"HTTP Body too large. Limit is {}, "
"chunked content longer than {}".format(limit, total)
)
chunk = rfile.read(length)
suffix = rfile.readline(5)
if suffix != b"\r\n":
raise HttpSyntaxException("Malformed chunked body")
if length == 0:
return
yield chunk