mitmproxy/pathod/language/base.py

580 lines
15 KiB
Python
Raw Normal View History

2014-10-25 01:24:05 +00:00
import operator
import os
import abc
2016-10-17 02:43:38 +00:00
import functools
import pyparsing as pp
from mitmproxy.utils import strutils
2016-10-19 20:45:18 +00:00
from mitmproxy.utils import human
import typing # noqa
from . import generators, exceptions
2012-04-28 00:42:03 +00:00
2016-05-07 18:14:39 +00:00
2016-10-17 04:29:45 +00:00
class Settings:
2015-06-18 16:12:11 +00:00
def __init__(
self,
2015-06-18 16:12:11 +00:00
is_client=False,
staticdir=None,
unconstrained_file_access=False,
request_host=None,
websocket_key=None,
protocol=None,
):
2015-06-08 08:45:17 +00:00
self.is_client = is_client
self.staticdir = staticdir
self.unconstrained_file_access = unconstrained_file_access
self.request_host = request_host
2015-06-08 08:45:17 +00:00
self.websocket_key = websocket_key # TODO: refactor this into the protocol
self.protocol = protocol
Sep = pp.Optional(pp.Literal(":")).suppress()
v_integer = pp.Word(pp.nums)\
2012-04-28 00:42:03 +00:00
.setName("integer")\
.setParseAction(lambda toks: int(toks[0]))
v_literal = pp.MatchFirst(
2012-04-28 00:42:03 +00:00
[
2014-10-25 01:24:05 +00:00
pp.QuotedString(
"\"",
unquoteResults=True,
multiline=True
),
pp.QuotedString(
"'",
unquoteResults=True,
multiline=True
),
2012-04-28 00:42:03 +00:00
]
)
v_naked_literal = pp.MatchFirst(
2012-04-28 00:42:03 +00:00
[
v_literal,
pp.Word("".join(i for i in pp.printables if i not in ",:\n@\'\""))
2012-04-28 00:42:03 +00:00
]
)
2016-10-17 04:29:45 +00:00
class Token:
2015-06-18 16:12:11 +00:00
"""
A token in the specification language. Tokens are immutable. The token
classes have no meaning in and of themselves, and are combined into
Components and Actions to build the language.
"""
2012-10-28 09:00:19 +00:00
__metaclass__ = abc.ABCMeta
2014-10-25 01:24:05 +00:00
2015-04-19 20:56:47 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls): # pragma: no cover
"""
A parse expression.
"""
2012-10-28 09:00:19 +00:00
return None
@abc.abstractmethod
2015-05-30 00:03:13 +00:00
def spec(self): # pragma: no cover
"""
A parseable specification for this token.
"""
2012-10-28 09:00:19 +00:00
return None
@property
def unique_name(self) -> typing.Optional[str]:
"""
Controls uniqueness constraints for tokens. No two tokens with the
same name will be allowed. If no uniquness should be applied, this
should be None.
"""
2015-05-17 04:42:59 +00:00
return self.__class__.__name__.lower()
2015-06-18 16:05:09 +00:00
def resolve(self, settings_, msg_):
"""
Resolves this token to ready it for transmission. This means that
the calculated offsets of actions are fixed.
settings: a language.Settings instance
msg: The containing message
"""
return self
def __repr__(self):
return self.spec()
2012-10-28 09:00:19 +00:00
class _TokValueLiteral(Token):
2015-06-18 16:12:11 +00:00
2012-04-28 00:42:03 +00:00
def __init__(self, val):
self.val = strutils.escaped_str_to_bytes(val)
2012-04-28 00:42:03 +00:00
2015-06-18 16:05:09 +00:00
def get_generator(self, settings_):
return self.val
2012-04-28 00:42:03 +00:00
2015-06-18 16:05:09 +00:00
def freeze(self, settings_):
return self
class TokValueLiteral(_TokValueLiteral):
2015-06-18 16:12:11 +00:00
"""
A literal with Python-style string escaping
"""
2012-04-28 00:42:03 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
2012-04-28 00:42:03 +00:00
e = v_literal.copy()
2015-06-18 09:07:33 +00:00
return e.setParseAction(cls.parseAction)
2014-10-26 03:27:25 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def parseAction(cls, x):
v = cls(*x)
2014-10-26 03:27:25 +00:00
return v
2012-04-28 00:42:03 +00:00
def spec(self):
inner = strutils.bytes_to_escaped_str(self.val)
inner = inner.replace(r"'", r"\x27")
return "'" + inner + "'"
class TokValueNakedLiteral(_TokValueLiteral):
2015-06-18 16:12:11 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = v_naked_literal.copy()
2015-06-18 09:07:33 +00:00
return e.setParseAction(lambda x: cls(*x))
2012-04-28 00:42:03 +00:00
def spec(self):
return strutils.bytes_to_escaped_str(self.val, escape_single_quotes=True)
2012-04-28 00:42:03 +00:00
class TokValueGenerate(Token):
2015-06-18 16:12:11 +00:00
2012-04-28 00:42:03 +00:00
def __init__(self, usize, unit, datatype):
if not unit:
unit = "b"
self.usize, self.unit, self.datatype = usize, unit, datatype
def bytes(self):
return self.usize * human.SIZE_UNITS[self.unit]
2012-04-28 00:42:03 +00:00
2015-06-18 16:05:09 +00:00
def get_generator(self, settings_):
return generators.RandomGenerator(self.datatype, self.bytes())
2012-04-28 00:42:03 +00:00
def freeze(self, settings):
g = self.get_generator(settings)
return TokValueLiteral(strutils.bytes_to_escaped_str(g[:], escape_single_quotes=True))
2012-04-28 00:42:03 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = pp.Literal("@").suppress() + v_integer
2012-04-28 00:42:03 +00:00
2016-10-17 02:43:38 +00:00
u = functools.reduce(
2014-10-25 01:24:05 +00:00
operator.or_,
[pp.Literal(i) for i in human.SIZE_UNITS.keys()]
).leaveWhitespace()
2012-04-28 00:42:03 +00:00
e = e + pp.Optional(u, default=None)
s = pp.Literal(",").suppress()
2016-10-17 02:43:38 +00:00
s += functools.reduce(
operator.or_,
[pp.Literal(i) for i in generators.DATATYPES.keys()]
)
2012-04-28 00:42:03 +00:00
e += pp.Optional(s, default="bytes")
2015-06-18 09:07:33 +00:00
return e.setParseAction(lambda x: cls(*x))
2012-04-28 00:42:03 +00:00
def spec(self):
2015-05-30 00:03:13 +00:00
s = "@%s" % self.usize
if self.unit != "b":
s += self.unit
if self.datatype != "bytes":
2015-05-30 00:03:13 +00:00
s += ",%s" % self.datatype
return s
2012-04-28 00:42:03 +00:00
class TokValueFile(Token):
2015-06-18 16:12:11 +00:00
2012-04-28 00:42:03 +00:00
def __init__(self, path):
self.path = str(path)
2012-04-28 00:42:03 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
2012-04-28 00:42:03 +00:00
e = pp.Literal("<").suppress()
e = e + v_naked_literal
2015-06-18 09:07:33 +00:00
return e.setParseAction(lambda x: cls(*x))
2012-04-28 00:42:03 +00:00
2015-06-18 16:05:09 +00:00
def freeze(self, settings_):
return self
2012-04-28 00:42:03 +00:00
def get_generator(self, settings):
if not settings.staticdir:
raise exceptions.FileAccessDenied("File access disabled.")
s = os.path.expanduser(self.path)
s = os.path.normpath(
os.path.abspath(os.path.join(settings.staticdir, s))
)
uf = settings.unconstrained_file_access
if not uf and not s.startswith(os.path.normpath(settings.staticdir)):
raise exceptions.FileAccessDenied(
2014-10-25 01:24:05 +00:00
"File access outside of configured directory"
)
if not os.path.isfile(s):
raise exceptions.FileAccessDenied("File not readable")
return generators.FileGenerator(s)
2012-04-28 00:42:03 +00:00
def spec(self):
return "<'%s'" % self.path
2012-04-28 00:42:03 +00:00
TokValue = pp.MatchFirst(
2012-04-28 00:42:03 +00:00
[
TokValueGenerate.expr(),
TokValueFile.expr(),
TokValueLiteral.expr()
2012-04-28 00:42:03 +00:00
]
)
TokNakedValue = pp.MatchFirst(
[
TokValueGenerate.expr(),
TokValueFile.expr(),
TokValueLiteral.expr(),
TokValueNakedLiteral.expr(),
]
)
TokOffset = pp.MatchFirst(
2014-10-25 01:24:05 +00:00
[
v_integer,
pp.Literal("r"),
pp.Literal("a")
]
)
class _Component(Token):
2015-06-18 16:12:11 +00:00
2012-10-28 21:00:41 +00:00
"""
A value component of the primary specification of an message.
Components produce byte values describing the bytes of the message.
2012-10-28 21:00:41 +00:00
"""
2015-05-30 00:03:13 +00:00
def values(self, settings): # pragma: no cover
2012-10-28 04:39:58 +00:00
"""
A sequence of values, which can either be strings or generators.
2012-10-28 04:39:58 +00:00
"""
pass
def string(self, settings=None):
2012-10-28 04:39:58 +00:00
"""
A bytestring representation of the object.
2012-10-28 04:39:58 +00:00
"""
return b"".join(i[:] for i in self.values(settings or {}))
2015-05-02 10:32:57 +00:00
class KeyValue(_Component):
2015-06-18 16:12:11 +00:00
2015-05-02 10:32:57 +00:00
"""
A key/value pair.
2015-06-18 09:07:33 +00:00
cls.preamble: leader
2015-05-02 10:32:57 +00:00
"""
2015-05-30 00:03:13 +00:00
def __init__(self, key, value):
self.key, self.value = key, value
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = pp.Literal(cls.preamble).suppress()
e += TokValue
e += pp.Literal("=").suppress()
e += TokValue
2015-06-18 09:07:33 +00:00
return e.setParseAction(lambda x: cls(*x))
def spec(self):
2015-05-30 00:03:13 +00:00
return "%s%s=%s" % (self.preamble, self.key.spec(), self.value.spec())
def freeze(self, settings):
2015-05-02 10:32:57 +00:00
return self.__class__(
self.key.freeze(settings), self.value.freeze(settings)
2014-10-25 01:24:05 +00:00
)
class CaselessLiteral(_Component):
2015-06-18 16:12:11 +00:00
"""
A caseless token that can take only one value.
"""
2015-05-30 00:03:13 +00:00
def __init__(self, value):
self.value = value
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
spec = pp.CaselessLiteral(cls.TOK)
spec = spec.setParseAction(lambda x: cls(*x))
return spec
def values(self, settings):
return self.TOK
def spec(self):
return self.TOK
2015-06-18 16:05:09 +00:00
def freeze(self, settings_):
return self
class OptionsOrValue(_Component):
2015-06-18 16:12:11 +00:00
"""
Can be any of a specified set of options, or a value specifier.
"""
2015-05-02 10:32:57 +00:00
preamble = ""
options = [] # type: typing.List[str]
2015-05-30 00:03:13 +00:00
2012-06-24 05:23:37 +00:00
def __init__(self, value):
# If it's a string, we were passed one of the options, so we lower-case
2012-06-24 05:23:37 +00:00
# it to be canonical. The user can specify a different case by using a
# string value literal.
2015-05-02 10:32:57 +00:00
self.option_used = False
if isinstance(value, str):
2015-05-03 01:54:52 +00:00
for i in self.options:
# Find the exact option value in a case-insensitive way
if i.lower() == value.lower():
self.option_used = True
value = TokValueLiteral(i)
break
2012-06-24 05:23:37 +00:00
self.value = value
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
parts = [pp.CaselessLiteral(i) for i in cls.options]
2012-06-24 05:23:37 +00:00
m = pp.MatchFirst(parts)
spec = m | TokValue.copy()
2015-06-18 09:07:33 +00:00
spec = spec.setParseAction(lambda x: cls(*x))
if cls.preamble:
spec = pp.Literal(cls.preamble).suppress() + spec
2012-06-24 05:23:37 +00:00
return spec
def values(self, settings):
return [
self.value.get_generator(settings)
]
def spec(self):
s = self.value.spec()
if s[1:-1].lower() in self.options:
s = s[1:-1].lower()
2015-05-30 00:03:13 +00:00
return "%s%s" % (self.preamble, s)
def freeze(self, settings):
return self.__class__(self.value.freeze(settings))
2012-06-24 05:23:37 +00:00
class Integer(_Component):
bounds = (None, None) # type: typing.Tuple[typing.Union[int, None], typing.Union[int , None]]
preamble = ""
def __init__(self, value):
v = int(value)
outofbounds = any([
self.bounds[0] is not None and v < self.bounds[0],
self.bounds[1] is not None and v > self.bounds[1]
])
if outofbounds:
raise exceptions.ParseException(
2015-05-30 00:03:13 +00:00
"Integer value must be between %s and %s." % self.bounds,
0, 0
)
2016-06-05 17:51:11 +00:00
self.value = str(value).encode()
2012-10-28 21:00:41 +00:00
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
2012-10-28 21:00:41 +00:00
e = v_integer.copy()
2015-06-18 09:07:33 +00:00
if cls.preamble:
e = pp.Literal(cls.preamble).suppress() + e
return e.setParseAction(lambda x: cls(*x))
2012-10-28 21:00:41 +00:00
def values(self, settings):
return [self.value]
2012-10-28 21:00:41 +00:00
def spec(self):
2016-06-05 17:51:11 +00:00
return "%s%s" % (self.preamble, self.value.decode())
2015-06-18 16:05:09 +00:00
def freeze(self, settings_):
return self
2012-10-28 21:00:41 +00:00
2015-05-03 00:54:25 +00:00
class Value(_Component):
2015-06-18 16:12:11 +00:00
"""
A value component lead by an optional preamble.
"""
preamble = ""
2012-10-28 21:00:41 +00:00
def __init__(self, value):
self.value = value
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = (TokValue | TokNakedValue)
2015-06-18 09:07:33 +00:00
if cls.preamble:
e = pp.Literal(cls.preamble).suppress() + e
return e.setParseAction(lambda x: cls(*x))
2012-10-28 21:00:41 +00:00
def values(self, settings):
return [self.value.get_generator(settings)]
def spec(self):
2015-05-30 00:03:13 +00:00
return "%s%s" % (self.preamble, self.value.spec())
def freeze(self, settings):
return self.__class__(self.value.freeze(settings))
2015-05-03 01:54:52 +00:00
class FixedLengthValue(Value):
2015-06-18 16:12:11 +00:00
"""
A value component lead by an optional preamble.
"""
preamble = ""
length = None # type: typing.Optional[int]
def __init__(self, value):
Value.__init__(self, value)
lenguess = None
try:
lenguess = len(value.get_generator(Settings()))
except exceptions.RenderError:
pass
# This check will fail if we know the length upfront
if lenguess is not None and lenguess != self.length:
raise exceptions.RenderError(
2015-05-30 00:03:13 +00:00
"Invalid value length: '%s' is %s bytes, should be %s." % (
self.spec(),
lenguess,
self.length
)
)
def values(self, settings):
ret = Value.values(self, settings)
l = sum(len(i) for i in ret)
# This check will fail if we don't know the length upfront - i.e. for
# file inputs
if l != self.length:
raise exceptions.RenderError(
2015-05-30 00:03:13 +00:00
"Invalid value length: '%s' is %s bytes, should be %s." % (
self.spec(),
l,
self.length
)
)
return ret
class Boolean(_Component):
2015-06-18 16:12:11 +00:00
"""
A boolean flag.
name = true
-name = false
"""
name = ""
def __init__(self, value):
self.value = value
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = pp.Optional(pp.Literal("-"), default=True)
2015-06-18 09:07:33 +00:00
e += pp.Literal(cls.name).suppress()
2015-06-18 16:05:09 +00:00
def parse(s_, loc_, toks):
val = True
if toks[0] == "-":
val = False
2015-06-18 09:07:33 +00:00
return cls(val)
return e.setParseAction(parse)
def spec(self):
2015-05-30 00:03:13 +00:00
return "%s%s" % ("-" if not self.value else "", self.name)
2015-05-03 01:54:52 +00:00
class IntField(_Component):
2015-06-18 16:12:11 +00:00
2015-05-03 01:54:52 +00:00
"""
An integer field, where values can optionally specified by name.
"""
names = {} # type: typing.Dict[str, int]
2015-05-03 01:54:52 +00:00
max = 16
preamble = ""
def __init__(self, value):
self.origvalue = value
self.value = self.names.get(value, value)
if self.value > self.max:
raise exceptions.ParseException(
2015-05-30 00:03:13 +00:00
"Value can't exceed %s" % self.max, 0, 0
2015-05-03 01:54:52 +00:00
)
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
parts = [pp.CaselessLiteral(i) for i in cls.names.keys()]
2015-05-03 01:54:52 +00:00
m = pp.MatchFirst(parts)
spec = m | v_integer.copy()
2015-06-18 09:07:33 +00:00
spec = spec.setParseAction(lambda x: cls(*x))
if cls.preamble:
spec = pp.Literal(cls.preamble).suppress() + spec
2015-05-03 01:54:52 +00:00
return spec
def values(self, settings):
return [str(self.value)]
def spec(self):
2015-05-30 00:03:13 +00:00
return "%s%s" % (self.preamble, self.origvalue)
class NestedMessage(Token):
2015-06-18 16:12:11 +00:00
"""
A nested message, as an escaped string with a preamble.
"""
preamble = ""
nest_type = None # type: ignore
def __init__(self, value):
Token.__init__(self)
self.value = value
try:
self.parsed = self.nest_type(
self.nest_type.expr().parseString(
2016-06-06 20:39:38 +00:00
value.val.decode(),
parseAll=True
)
)
except pp.ParseException as v:
raise exceptions.ParseException(v.msg, v.line, v.col)
@classmethod
2015-06-18 09:07:33 +00:00
def expr(cls):
e = pp.Literal(cls.preamble).suppress()
e = e + TokValueLiteral.expr()
2015-06-18 09:07:33 +00:00
return e.setParseAction(lambda x: cls(*x))
def values(self, settings):
return [
self.value.get_generator(settings),
]
def spec(self):
return "%s%s" % (self.preamble, self.value.spec())
def freeze(self, settings):
f = self.parsed.freeze(settings).spec()
return self.__class__(TokValueLiteral(strutils.bytes_to_escaped_str(f.encode(), escape_single_quotes=True)))