mitmproxy/libpathod/rparse.py

800 lines
20 KiB
Python
Raw Normal View History

import operator, string, random, mmap, os, time
from email.utils import formatdate
2012-04-28 00:42:03 +00:00
import contrib.pyparsing as pp
2012-07-08 23:09:37 +00:00
from netlib import http_status, tcp
2012-06-24 05:47:55 +00:00
import utils
2012-04-28 00:42:03 +00:00
BLOCKSIZE = 1024
2012-06-24 07:12:52 +00:00
TRUNCATE = 1024
class FileAccessDenied(Exception): pass
2012-04-28 02:43:57 +00:00
class ParseException(Exception):
def __init__(self, msg, s, col):
Exception.__init__(self)
self.msg = msg
self.s = s
self.col = col
def marked(self):
return "%s\n%s"%(self.s, " "*(self.col-1) + "^")
def __str__(self):
return "%s at offset %s of %s"%(self.msg, self.col, repr(self.s))
2012-04-28 02:43:57 +00:00
def actions_log(lst):
ret = []
for i in lst:
if i[1] == "inject":
ret.append(
[i[0], i[1], repr(i[2])]
)
else:
ret.append(i)
return ret
2012-06-24 05:23:37 +00:00
def ready_actions(length, lst):
ret = []
for i in lst:
itms = list(i)
if i[0] == "r":
2012-06-24 05:23:37 +00:00
itms[0] = random.randrange(length)
elif i[0] == "a":
2012-06-24 05:23:37 +00:00
itms[0] = length+1
ret.append(tuple(itms))
ret.sort()
return ret
def send_chunk(fp, val, blocksize, start, end):
"""
(start, end): Inclusive lower bound, exclusive upper bound.
"""
for i in range(start, end, blocksize):
fp.write(
val[i:min(i+blocksize, end)]
)
return end-start
def write_values(fp, vals, actions, sofar=0, skip=0, blocksize=BLOCKSIZE):
"""
2012-07-20 11:36:39 +00:00
vals: A list of values, which may be strings or Value objects.
actions: A list of (offset, action, arg) tuples. Action may be "pause" or "disconnect".
2012-07-20 11:36:39 +00:00
Both vals and actions are in reverse order, with the first items last.
Return True if connection should disconnect.
"""
sofar = 0
2012-07-08 23:09:37 +00:00
try:
while vals:
v = vals.pop()
offset = 0
while actions and actions[-1][0] < (sofar + len(v)):
a = actions.pop()
offset += send_chunk(fp, v, blocksize, offset, a[0]-sofar-offset)
if a[1] == "pause":
time.sleep(a[2])
elif a[1] == "disconnect":
return True
elif a[1] == "inject":
send_chunk(fp, a[2], blocksize, 0, len(a[2]))
send_chunk(fp, v, blocksize, offset, len(v))
sofar += len(v)
2012-07-23 07:25:57 +00:00
except tcp.NetLibDisconnect: # pragma: no cover
2012-07-08 23:09:37 +00:00
return True
2012-04-28 00:42:03 +00:00
DATATYPES = dict(
ascii_letters = string.ascii_letters,
ascii_lowercase = string.ascii_lowercase,
ascii_uppercase = string.ascii_uppercase,
digits = string.digits,
hexdigits = string.hexdigits,
octdigits = string.octdigits,
punctuation = string.punctuation,
whitespace = string.whitespace,
ascii = string.printable,
bytes = "".join(chr(i) for i in range(256))
)
v_integer = pp.Regex(r"[+-]?\d+")\
.setName("integer")\
.setParseAction(lambda toks: int(toks[0]))
v_literal = pp.MatchFirst(
2012-04-28 00:42:03 +00:00
[
pp.QuotedString("\"", escChar="\\", unquoteResults=True, multiline=True),
pp.QuotedString("'", escChar="\\", unquoteResults=True, multiline=True),
2012-04-28 00:42:03 +00:00
]
)
v_naked_literal = pp.MatchFirst(
2012-04-28 00:42:03 +00:00
[
v_literal,
pp.Word("".join(i for i in pp.printables if i not in ",:\n"))
2012-04-28 00:42:03 +00:00
]
)
class LiteralGenerator:
def __init__(self, s):
self.s = s
def __eq__(self, other):
return self[:] == other
def __len__(self):
return len(self.s)
def __getitem__(self, x):
return self.s.__getitem__(x)
def __getslice__(self, a, b):
return self.s.__getslice__(a, b)
def __repr__(self):
return '"%s"'%self.s
2012-04-28 00:42:03 +00:00
class RandomGenerator:
def __init__(self, dtype, length):
self.dtype = dtype
2012-04-28 00:42:03 +00:00
self.length = length
def __len__(self):
return self.length
def __getitem__(self, x):
return random.choice(DATATYPES[self.dtype])
2012-04-28 00:42:03 +00:00
def __getslice__(self, a, b):
b = min(b, self.length)
chars = DATATYPES[self.dtype]
return "".join(random.choice(chars) for x in range(a, b))
def __repr__(self):
return "%s random from %s"%(self.length, self.dtype)
2012-04-28 00:42:03 +00:00
class FileGenerator:
def __init__(self, path):
self.path = path
2012-04-28 01:16:51 +00:00
self.fp = file(path, "r")
self.map = mmap.mmap(self.fp.fileno(), 0, prot=mmap.PROT_READ)
def __len__(self):
return len(self.map)
def __getitem__(self, x):
return self.map.__getitem__(x)
def __getslice__(self, a, b):
return self.map.__getslice__(a, b)
2012-04-28 00:42:03 +00:00
def __repr__(self):
return "<%s"%self.path
2012-04-28 00:42:03 +00:00
class _Value:
2012-04-28 00:42:03 +00:00
def __init__(self, val):
self.val = val.decode("string_escape")
2012-04-28 00:42:03 +00:00
def get_generator(self, settings):
return LiteralGenerator(self.val)
2012-07-23 07:25:57 +00:00
def __repr__(self):
return self.val
class ValueLiteral(_Value):
2012-04-28 00:42:03 +00:00
@classmethod
def expr(klass):
e = v_literal.copy()
return e.setParseAction(lambda x: klass(*x))
class ValueNakedLiteral(_Value):
@classmethod
def expr(klass):
e = v_naked_literal.copy()
return e.setParseAction(lambda x: klass(*x))
2012-04-28 00:42:03 +00:00
class ValueGenerate:
def __init__(self, usize, unit, datatype):
if not unit:
unit = "b"
self.usize, self.unit, self.datatype = usize, unit, datatype
def bytes(self):
return self.usize * utils.SIZE_UNITS[self.unit]
2012-04-28 00:42:03 +00:00
def get_generator(self, settings):
return RandomGenerator(self.datatype, self.bytes())
2012-04-28 00:42:03 +00:00
@classmethod
def expr(klass):
e = pp.Literal("@").suppress() + v_integer
2012-04-28 00:42:03 +00:00
u = reduce(operator.or_, [pp.Literal(i) for i in utils.SIZE_UNITS.keys()])
2012-04-28 00:42:03 +00:00
e = e + pp.Optional(u, default=None)
s = pp.Literal(",").suppress()
2012-04-28 00:42:03 +00:00
s += reduce(operator.or_, [pp.Literal(i) for i in DATATYPES.keys()])
e += pp.Optional(s, default="bytes")
return e.setParseAction(lambda x: klass(*x))
def __str__(self):
return "@%s%s,%s"%(self.usize, self.unit, self.datatype)
2012-04-28 00:42:03 +00:00
class ValueFile:
def __init__(self, path):
self.path = path
@classmethod
def expr(klass):
e = pp.Literal("<").suppress()
e = e + v_naked_literal
2012-04-28 00:42:03 +00:00
return e.setParseAction(lambda x: klass(*x))
def get_generator(self, settings):
uf = settings.get("unconstrained_file_access")
2012-04-28 01:16:51 +00:00
sd = settings.get("staticdir")
if not sd:
2012-07-23 05:53:17 +00:00
raise FileAccessDenied("File access disabled.")
sd = os.path.normpath(os.path.abspath(sd))
s = os.path.expanduser(self.path)
s = os.path.normpath(os.path.abspath(os.path.join(sd, s)))
if not uf and not s.startswith(sd):
2012-07-23 05:53:17 +00:00
raise FileAccessDenied("File access outside of configured directory")
if not os.path.isfile(s):
2012-07-23 05:53:17 +00:00
raise FileAccessDenied("File not readable")
return FileGenerator(s)
2012-04-28 00:42:03 +00:00
def __str__(self):
return "<%s"%(self.path)
Value = pp.MatchFirst(
[
ValueGenerate.expr(),
ValueFile.expr(),
ValueLiteral.expr()
]
)
NakedValue = pp.MatchFirst(
[
ValueGenerate.expr(),
ValueFile.expr(),
ValueLiteral.expr(),
ValueNakedLiteral.expr(),
]
)
Offset = pp.MatchFirst(
[
v_integer,
pp.Literal("r"),
pp.Literal("a")
]
)
class ShortcutContentType:
def __init__(self, value):
self.value = value
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
r.headers.append(
(
LiteralGenerator("Content-Type"),
self.value.get_generator(settings)
)
)
@classmethod
def expr(klass):
e = pp.Literal("c").suppress()
e = e + Value
return e.setParseAction(lambda x: klass(*x))
class ShortcutLocation:
def __init__(self, value):
self.value = value
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
r.headers.append(
(
LiteralGenerator("Location"),
self.value.get_generator(settings)
)
)
@classmethod
def expr(klass):
e = pp.Literal("l").suppress()
e = e + Value
return e.setParseAction(lambda x: klass(*x))
2012-04-28 00:42:03 +00:00
class Body:
def __init__(self, value):
self.value = value
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
2012-04-28 00:42:03 +00:00
r.body = self.value.get_generator(settings)
@classmethod
def expr(klass):
e = pp.Literal("b").suppress()
2012-04-28 00:42:03 +00:00
e = e + Value
return e.setParseAction(lambda x: klass(*x))
class Raw:
def accept(self, settings, r):
r.raw = True
@classmethod
def expr(klass):
e = pp.Literal("r").suppress()
return e.setParseAction(lambda x: klass(*x))
class Path:
def __init__(self, value):
if isinstance(value, basestring):
value = ValueLiteral(value)
self.value = value
def accept(self, settings, r):
r.path = self.value.get_generator(settings)
@classmethod
def expr(klass):
e = NakedValue.copy()
return e.setParseAction(lambda x: klass(*x))
2012-06-24 05:23:37 +00:00
class Method:
methods = [
"get",
"head",
"post",
"put",
"delete",
"options",
"trace",
"connect",
]
def __init__(self, value):
# If it's a string, we were passed one of the methods, so we upper-case
# it to be canonical. The user can specify a different case by using a
# string value literal.
if isinstance(value, basestring):
2012-06-24 05:47:55 +00:00
value = ValueLiteral(value.upper())
2012-06-24 05:23:37 +00:00
self.value = value
def accept(self, settings, r):
r.method = self.value.get_generator(settings)
@classmethod
def expr(klass):
parts = [pp.CaselessLiteral(i) for i in klass.methods]
m = pp.MatchFirst(parts)
spec = m | Value.copy()
spec = spec.setParseAction(lambda x: klass(*x))
return spec
class PauseAt:
def __init__(self, seconds, offset):
self.seconds, self.offset = seconds, offset
2012-04-28 00:42:03 +00:00
@classmethod
def expr(klass):
e = pp.Literal("p").suppress()
2012-06-24 05:23:37 +00:00
e += pp.MatchFirst(
[
v_integer,
pp.Literal("f")
]
)
e += pp.Literal(",").suppress()
e += Offset
2012-04-28 00:42:03 +00:00
return e.setParseAction(lambda x: klass(*x))
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
r.actions.append((self.offset, "pause", self.seconds))
2012-04-28 00:42:03 +00:00
class DisconnectAt:
2012-04-28 00:42:03 +00:00
def __init__(self, value):
self.value = value
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
r.actions.append((self.value, "disconnect"))
2012-04-28 00:42:03 +00:00
@classmethod
def expr(klass):
e = pp.Literal("d").suppress()
e += e + pp.MatchFirst(
[
v_integer,
pp.Literal("r")
]
)
return e.setParseAction(lambda x: klass(*x))
2012-04-28 00:42:03 +00:00
2012-07-20 11:36:39 +00:00
class InjectAt:
def __init__(self, offset, value):
self.offset, self.value = offset, value
@classmethod
def expr(klass):
e = pp.Literal("i").suppress()
e += Offset
2012-07-20 11:36:39 +00:00
e += pp.Literal(",").suppress()
e += Value
return e.setParseAction(lambda x: klass(*x))
def accept(self, settings, r):
r.actions.append(
(
self.offset,
"inject",
self.value.get_generator(settings)
)
)
2012-07-20 11:36:39 +00:00
2012-04-28 00:42:03 +00:00
class Header:
def __init__(self, key, value):
self.key, self.value = key, value
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
2012-04-28 00:42:03 +00:00
r.headers.append(
(
self.key.get_generator(settings),
self.value.get_generator(settings)
)
)
@classmethod
def expr(klass):
e = pp.Literal("h").suppress()
2012-04-28 00:42:03 +00:00
e += Value
e += pp.Literal("=").suppress()
2012-04-28 00:42:03 +00:00
e += Value
return e.setParseAction(lambda x: klass(*x))
class Code:
def __init__(self, code, msg=None):
self.code, self.msg = code, msg
if msg is None:
2012-06-23 06:34:35 +00:00
self.msg = ValueLiteral(http_status.RESPONSES.get(self.code, "Unknown code"))
2012-04-28 00:42:03 +00:00
2012-06-24 05:23:37 +00:00
def accept(self, settings, r):
2012-04-28 00:42:03 +00:00
r.code = self.code
r.msg = self.msg.get_generator(settings)
@classmethod
def expr(klass):
e = v_integer
e = e + pp.Optional(
Value
2012-04-28 00:42:03 +00:00
)
return e.setParseAction(lambda x: klass(*x))
2012-06-24 07:12:52 +00:00
class Message:
version = "HTTP/1.1"
def __init__(self):
self.body = LiteralGenerator("")
self.headers = []
self.actions = []
self.raw = False
2012-06-24 07:12:52 +00:00
def length(self):
"""
Calculate the length of the base message without any applied actions.
"""
2012-06-24 07:12:52 +00:00
l = sum(len(x) for x in self.preamble())
l += 2
for i in self.headers:
l += len(i[0]) + len(i[1])
l += 4
l += 2
l += len(self.body)
return l
def preview_safe(self):
"""
Modify this message to be safe for previews.
"""
self.actions = [i for i in self.actions if i[1] != "pause"]
def effective_length(self, actions):
"""
Calculate the length of the base message with all applied actions.
"""
# Order matters here, and must match the order of application in
# write_values.
l = self.length()
for i in reversed(actions):
if i[1] == "disconnect":
return i[0]
elif i[1] == "inject":
l += len(i[2])
return l
def serve(self, fp, check, request_host):
"""
fp: The file pointer to write to.
check: A function called with the effective actions (after random
values have been calculated). If it returns False service proceeds,
otherwise the return is treated as an error message to be sent to
the client, and service stops.
request_host: If this a request, this is the connecting host. If
None, we assume it's a response. Used to decide what standard
modifications to make if raw is not set.
Calling this function may modify the object.
"""
2012-06-24 07:12:52 +00:00
started = time.time()
if not self.raw:
if self.body and not utils.get_header("Content-Length", self.headers):
self.headers.append(
(
LiteralGenerator("Content-Length"),
LiteralGenerator(str(len(self.body))),
)
2012-06-24 07:12:52 +00:00
)
if request_host:
if not utils.get_header("Host", self.headers):
self.headers.append(
(
LiteralGenerator("Host"),
LiteralGenerator(request_host)
)
)
else:
if not utils.get_header("Date", self.headers):
self.headers.append(
(
LiteralGenerator("Date"),
LiteralGenerator(formatdate(timeval=None, localtime=False, usegmt=True))
)
)
2012-06-24 07:12:52 +00:00
hdrs = []
for k, v in self.headers:
hdrs.extend([
k,
": ",
v,
"\r\n",
])
vals = self.preamble()
vals.append("\r\n")
vals.extend(hdrs)
vals.append("\r\n")
if self.body:
vals.append(self.body)
vals.reverse()
actions = ready_actions(self.length(), self.actions)
actions.reverse()
if check:
ret = check(self, actions)
if ret:
err = PathodErrorResponse(ret)
err.serve(fp)
return dict(
disconnect = True,
actions = actions_log(actions),
error = ret
)
2012-06-24 07:12:52 +00:00
disconnect = write_values(fp, vals, actions[:])
duration = time.time() - started
ret = dict(
disconnect = disconnect,
started = started,
duration = duration,
actions = actions_log(actions),
2012-06-24 07:12:52 +00:00
)
for i in self.logattrs:
v = getattr(self, i)
# Careful not to log any VALUE specs without sanitizing them first. We truncate at 1k.
if hasattr(v, "__len__"):
v = v[:TRUNCATE]
ret[i] = v
return ret
Sep = pp.Optional(pp.Literal(":")).suppress()
2012-06-24 07:12:52 +00:00
class Response(Message):
2012-06-24 05:47:55 +00:00
comps = (
Body,
Header,
PauseAt,
DisconnectAt,
2012-07-20 11:36:39 +00:00
InjectAt,
2012-06-24 05:47:55 +00:00
ShortcutContentType,
2012-06-24 07:12:52 +00:00
ShortcutLocation,
Raw
2012-06-24 05:47:55 +00:00
)
2012-06-24 07:12:52 +00:00
logattrs = ["code", "version"]
2012-06-24 05:47:55 +00:00
def __init__(self):
Message.__init__(self)
2012-06-24 07:12:52 +00:00
self.code = 200
self.msg = LiteralGenerator(http_status.RESPONSES[self.code])
def preamble(self):
return [self.version, " ", str(self.code), " ", self.msg]
2012-06-24 05:47:55 +00:00
@classmethod
def expr(klass):
parts = [i.expr() for i in klass.comps]
atom = pp.MatchFirst(parts)
resp = pp.And(
[
2012-06-24 07:12:52 +00:00
Code.expr(),
pp.ZeroOrMore(Sep + atom)
2012-06-24 05:47:55 +00:00
]
)
return resp
2012-06-24 07:12:52 +00:00
def __str__(self):
parts = [
"%s %s"%(self.code, self.msg[:])
]
return "\n".join(parts)
2012-06-24 05:47:55 +00:00
2012-06-24 07:12:52 +00:00
class Request(Message):
comps = (
2012-04-28 00:42:03 +00:00
Body,
Header,
PauseAt,
DisconnectAt,
2012-07-20 11:36:39 +00:00
InjectAt,
ShortcutContentType,
Raw
)
2012-06-24 07:12:52 +00:00
logattrs = ["method", "path"]
2012-04-28 02:43:57 +00:00
def __init__(self):
Message.__init__(self)
2012-06-24 07:12:52 +00:00
self.method = None
self.path = None
2012-04-28 00:42:03 +00:00
2012-06-24 07:12:52 +00:00
def preamble(self):
return [self.method, " ", self.path, " ", self.version]
2012-04-28 00:42:03 +00:00
@classmethod
def expr(klass):
parts = [i.expr() for i in klass.comps]
atom = pp.MatchFirst(parts)
resp = pp.And(
[
2012-06-24 07:12:52 +00:00
Method.expr(),
Sep,
2012-06-24 07:12:52 +00:00
Path.expr(),
pp.ZeroOrMore(Sep + atom)
2012-04-28 00:42:03 +00:00
]
)
return resp
def __str__(self):
parts = [
2012-06-24 07:12:52 +00:00
"%s %s"%(self.method[:], self.path[:])
2012-04-28 00:42:03 +00:00
]
return "\n".join(parts)
2012-06-24 07:12:52 +00:00
class CraftedRequest(Request):
def __init__(self, settings, spec, tokens):
Request.__init__(self)
self.spec, self.tokens = spec, tokens
for i in tokens:
i.accept(settings, self)
def serve(self, fp, check, host):
d = Request.serve(self, fp, check, host)
2012-06-24 07:12:52 +00:00
d["spec"] = self.spec
return d
2012-04-28 02:43:57 +00:00
class CraftedResponse(Response):
2012-04-29 02:59:54 +00:00
def __init__(self, settings, spec, tokens):
2012-04-28 02:43:57 +00:00
Response.__init__(self)
2012-04-29 02:59:54 +00:00
self.spec, self.tokens = spec, tokens
2012-04-28 02:43:57 +00:00
for i in tokens:
2012-06-24 05:23:37 +00:00
i.accept(settings, self)
2012-04-28 02:43:57 +00:00
def serve(self, fp, check):
d = Response.serve(self, fp, check, None)
2012-04-29 02:59:54 +00:00
d["spec"] = self.spec
return d
2012-04-28 02:43:57 +00:00
class PathodErrorResponse(Response):
def __init__(self, msg, body=None):
2012-04-28 02:43:57 +00:00
Response.__init__(self)
self.code = 800
self.msg = LiteralGenerator(msg)
self.body = LiteralGenerator("pathod error: " + (body or msg))
2012-04-28 00:42:03 +00:00
self.headers = [
(
LiteralGenerator("Content-Type"), LiteralGenerator("text/plain")
2012-04-28 00:42:03 +00:00
),
]
def serve(self, fp, check=None):
d = Response.serve(self, fp, check, None)
2012-04-29 02:59:54 +00:00
d["internal"] = True
return d
2012-04-28 00:42:03 +00:00
FILESTART = "+"
def read_file(settings, s):
uf = settings.get("unconstrained_file_access")
sd = settings.get("staticdir")
if not sd:
raise FileAccessDenied("File access disabled.")
sd = os.path.normpath(os.path.abspath(sd))
s = s[1:]
s = os.path.expanduser(s)
s = os.path.normpath(os.path.abspath(os.path.join(sd, s)))
if not uf and not s.startswith(sd):
raise FileAccessDenied("File access outside of configured directory")
if not os.path.isfile(s):
raise FileAccessDenied("File not readable")
return file(s, "r").read()
def parse_response(settings, s):
"""
May raise ParseException or FileAccessDenied
"""
if s.startswith(FILESTART):
s = read_file(settings, s)
2012-04-28 00:42:03 +00:00
try:
2012-04-29 02:59:54 +00:00
return CraftedResponse(settings, s, Response.expr().parseString(s, parseAll=True))
2012-04-28 00:42:03 +00:00
except pp.ParseException, v:
2012-04-28 02:43:57 +00:00
raise ParseException(v.msg, v.line, v.col)
2012-06-24 05:47:55 +00:00
def parse_request(settings, s):
"""
May raise ParseException or FileAccessDenied
"""
if s.startswith(FILESTART):
s = read_file(settings, s)
2012-06-24 05:47:55 +00:00
try:
return CraftedRequest(settings, s, Request.expr().parseString(s, parseAll=True))
except pp.ParseException, v:
raise ParseException(v.msg, v.line, v.col)