mitmproxy/pathod/language/http.py

394 lines
10 KiB
Python
Raw Normal View History

import abc
import pyparsing as pp
from mitmproxy.net import websocket_utils
from mitmproxy.net.http import status_codes, url, user_agents
from . import base, exceptions, actions, message
# TODO: use mitmproxy.net.semantics.protocol assemble method,
2015-07-29 09:26:10 +00:00
# instead of duplicating the HTTP on-the-wire representation here.
# see http2 language for an example
2016-05-28 20:25:54 +00:00
class WS(base.CaselessLiteral):
TOK = "ws"
class Raw(base.CaselessLiteral):
TOK = "r"
2015-05-03 00:54:25 +00:00
class Path(base.Value):
pass
class StatusCode(base.Integer):
pass
2015-05-03 00:54:25 +00:00
class Reason(base.Value):
preamble = "m"
2015-05-03 00:54:25 +00:00
class Body(base.Value):
preamble = "b"
class Times(base.Integer):
preamble = "x"
class Method(base.OptionsOrValue):
options = [
2015-05-03 01:54:52 +00:00
"GET",
"HEAD",
"POST",
"PUT",
"DELETE",
"OPTIONS",
"TRACE",
"CONNECT",
]
2016-10-17 04:29:45 +00:00
class _HeaderMixin:
2017-05-26 14:14:20 +00:00
@property
def unique_name(self):
return None
2015-05-02 10:32:57 +00:00
def format_header(self, key, value):
2016-06-05 17:22:18 +00:00
return [key, b": ", value, b"\r\n"]
2015-05-02 10:32:57 +00:00
def values(self, settings):
return self.format_header(
self.key.get_generator(settings),
self.value.get_generator(settings),
)
class Header(_HeaderMixin, base.KeyValue):
preamble = "h"
2015-05-03 00:54:25 +00:00
class ShortcutContentType(_HeaderMixin, base.Value):
2015-05-02 10:32:57 +00:00
preamble = "c"
key = base.TokValueLiteral("Content-Type")
2015-05-02 10:32:57 +00:00
2015-05-03 00:54:25 +00:00
class ShortcutLocation(_HeaderMixin, base.Value):
2015-05-02 10:32:57 +00:00
preamble = "l"
key = base.TokValueLiteral("Location")
2015-05-02 10:32:57 +00:00
class ShortcutUserAgent(_HeaderMixin, base.OptionsOrValue):
preamble = "u"
2015-07-15 20:04:25 +00:00
options = [i[1] for i in user_agents.UASTRINGS]
key = base.TokValueLiteral("User-Agent")
2015-05-02 10:32:57 +00:00
def values(self, settings):
value = self.value.val
2015-05-02 10:32:57 +00:00
if self.option_used:
value = user_agents.get_by_shortcut(value.lower().decode())[2].encode()
2015-05-02 10:32:57 +00:00
return self.format_header(
self.key.get_generator(settings),
value
)
def get_header(val, headers):
"""
Header keys may be Values, so we have to "generate" them as we try the
match.
"""
for h in headers:
k = h.key.get_generator({})
if len(k) == len(val) and k[:].lower() == val.lower():
return h
return None
class _HTTPMessage(message.Message):
2016-06-04 15:27:29 +00:00
version = b"HTTP/1.1"
2015-05-30 00:03:13 +00:00
@property
def actions(self):
return self.toks(actions._Action)
@property
def raw(self):
return bool(self.tok(Raw))
@property
def body(self):
return self.tok(Body)
@abc.abstractmethod
2015-05-30 00:03:13 +00:00
def preamble(self, settings): # pragma: no cover
pass
2015-05-02 10:32:57 +00:00
@property
def headers(self):
return self.toks(_HeaderMixin)
def values(self, settings):
vals = self.preamble(settings)
2016-06-04 15:27:29 +00:00
vals.append(b"\r\n")
for h in self.headers:
vals.extend(h.values(settings))
2016-06-04 15:27:29 +00:00
vals.append(b"\r\n")
if self.body:
vals.extend(self.body.values(settings))
return vals
class Response(_HTTPMessage):
unique_name = None # type: ignore
comps = (
2015-05-02 10:32:57 +00:00
Header,
ShortcutContentType,
ShortcutLocation,
Raw,
Reason,
Body,
actions.PauseAt,
actions.DisconnectAt,
actions.InjectAt,
)
logattrs = ["status_code", "reason", "version", "body"]
@property
def ws(self):
return self.tok(WS)
@property
def status_code(self):
return self.tok(StatusCode)
@property
def reason(self):
return self.tok(Reason)
def preamble(self, settings):
2016-06-04 15:27:29 +00:00
l = [self.version, b" "]
l.extend(self.status_code.values(settings))
status_code = int(self.status_code.value)
2016-06-05 17:22:18 +00:00
l.append(b" ")
if self.reason:
l.extend(self.reason.values(settings))
else:
l.append(
2015-07-15 20:04:25 +00:00
status_codes.RESPONSES.get(
status_code,
2016-06-22 19:04:38 +00:00
"Unknown code"
2016-06-06 10:36:56 +00:00
).encode()
)
return l
def resolve(self, settings, msg=None):
tokens = self.tokens[:]
if self.ws:
if not settings.websocket_key:
raise exceptions.RenderError(
"No websocket key - have we seen a client handshake?"
)
if not self.status_code:
tokens.insert(
1,
StatusCode(101)
)
headers = websocket_utils.server_handshake_headers(
settings.websocket_key
)
2015-09-05 16:16:08 +00:00
for i in headers.fields:
if not get_header(i[0], self.headers):
tokens.append(
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral(i[0].decode()),
base.TokValueLiteral(i[1].decode()))
)
if not self.raw:
if not get_header(b"Content-Length", self.headers):
if not self.body:
length = 0
else:
length = sum(
len(i) for i in self.body.values(settings)
)
tokens.append(
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral("Content-Length"),
base.TokValueLiteral(str(length)),
)
)
intermediate = self.__class__(tokens)
return self.__class__(
[i.resolve(settings, intermediate) for i in tokens]
)
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
parts = [i.expr() for i in cls.comps]
atom = pp.MatchFirst(parts)
resp = pp.And(
[
pp.MatchFirst(
[
WS.expr() + pp.Optional(
base.Sep + StatusCode.expr()
),
StatusCode.expr(),
]
),
pp.ZeroOrMore(base.Sep + atom)
]
)
2015-06-18 09:07:33 +00:00
resp = resp.setParseAction(cls)
return resp
def spec(self):
return ":".join([i.spec() for i in self.tokens])
2017-05-26 14:22:13 +00:00
class NestedResponse(message.NestedMessage):
preamble = "s"
nest_type = Response
class Request(_HTTPMessage):
comps = (
2015-05-02 10:32:57 +00:00
Header,
ShortcutContentType,
ShortcutUserAgent,
Raw,
NestedResponse,
Body,
Times,
actions.PauseAt,
actions.DisconnectAt,
actions.InjectAt,
)
logattrs = ["method", "path", "body"]
@property
def ws(self):
return self.tok(WS)
@property
def method(self):
return self.tok(Method)
@property
def path(self):
return self.tok(Path)
@property
def times(self):
return self.tok(Times)
@property
def nested_response(self):
return self.tok(NestedResponse)
def preamble(self, settings):
v = self.method.values(settings)
2016-06-05 17:22:18 +00:00
v.append(b" ")
v.extend(self.path.values(settings))
if self.nested_response:
v.append(self.nested_response.parsed.spec())
2016-06-05 17:22:18 +00:00
v.append(b" ")
v.append(self.version)
return v
def resolve(self, settings, msg=None):
tokens = self.tokens[:]
if self.ws:
if not self.method:
tokens.insert(
1,
Method("get")
)
for i in websocket_utils.client_handshake_headers().fields:
if not get_header(i[0], self.headers):
tokens.append(
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral(i[0].decode()),
base.TokValueLiteral(i[1].decode())
)
)
if not self.raw:
if not get_header(b"Content-Length", self.headers):
if self.body:
length = sum(
len(i) for i in self.body.values(settings)
)
tokens.append(
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral("Content-Length"),
base.TokValueLiteral(str(length)),
)
)
if settings.request_host:
if not get_header(b"Host", self.headers):
h = settings.request_host
if self.path:
path = b"".join(self.path.values({})).decode(
"ascii", errors="ignore"
)
try:
_, h, _, _ = url.parse(path)
h = h.decode("ascii", errors="ignore")
except ValueError:
pass
tokens.append(
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral("Host"),
base.TokValueLiteral(h)
)
)
intermediate = self.__class__(tokens)
return self.__class__(
[i.resolve(settings, intermediate) for i in tokens]
)
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
parts = [i.expr() for i in cls.comps]
atom = pp.MatchFirst(parts)
resp = pp.And(
[
pp.MatchFirst(
[
WS.expr() + pp.Optional(
base.Sep + Method.expr()
),
Method.expr(),
]
),
base.Sep,
Path.expr(),
pp.ZeroOrMore(base.Sep + atom)
]
)
2015-06-18 09:07:33 +00:00
resp = resp.setParseAction(cls)
return resp
def spec(self):
return ":".join([i.spec() for i in self.tokens])
def make_error_response(reason, body=None):
tokens = [
StatusCode("800"),
2015-05-02 10:32:57 +00:00
Header(
base.TokValueLiteral("Content-Type"),
base.TokValueLiteral("text/plain")
),
Reason(base.TokValueLiteral(reason)),
Body(base.TokValueLiteral("pathod error: " + (body or reason))),
]
2015-06-11 14:13:22 +00:00
return Response(tokens)