mitmproxy/libmproxy/filt.py

423 lines
9.8 KiB
Python
Raw Normal View History

2010-02-16 04:09:07 +00:00
"""
The following operators are understood:
~q Request
~s Response
2012-02-10 02:04:20 +00:00
2010-02-16 04:09:07 +00:00
Headers:
Patterns are matched against "name: value" strings. Field names are
all-lowercase.
~a Asset content-type in response. Asset content types are:
text/javascript
application/x-javascript
application/javascript
text/css
image/*
application/x-shockwave-flash
2010-02-16 04:09:07 +00:00
~h rex Header line in either request or response
~hq rex Header in request
~hs rex Header in response
~b rex Expression in the body of either request or response
~bq rex Expression in the body of request
~bq rex Expression in the body of response
~t rex Shortcut for content-type header.
2012-07-06 10:33:09 +00:00
~d rex Request domain
~m rex Method
2010-02-16 04:09:07 +00:00
~u rex URL
~c CODE Response code.
2012-02-10 02:04:20 +00:00
rex Equivalent to ~u rex
2010-02-16 04:09:07 +00:00
"""
from __future__ import absolute_import
2015-05-30 00:03:28 +00:00
import re
import sys
import pyparsing as pp
2010-02-16 04:09:07 +00:00
class _Token:
def dump(self, indent=0, fp=sys.stdout):
2015-05-30 00:03:28 +00:00
print >> fp, "\t" * indent, self.__class__.__name__,
2010-02-16 04:09:07 +00:00
if hasattr(self, "expr"):
2015-05-30 00:03:28 +00:00
print >> fp, "(%s)" % self.expr,
2010-02-16 04:09:07 +00:00
print >> fp
class _Action(_Token):
@classmethod
def make(klass, s, loc, toks):
return klass(*toks[1:])
class FErr(_Action):
code = "e"
help = "Match error"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return True if f.error else False
2010-02-16 04:09:07 +00:00
class FReq(_Action):
code = "q"
help = "Match request with no response"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
if not f.response:
return True
2010-02-16 04:09:07 +00:00
class FResp(_Action):
code = "s"
help = "Match response"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return True if f.response else False
2012-02-10 02:04:20 +00:00
2010-02-16 04:09:07 +00:00
class _Rex(_Action):
2015-09-05 18:45:58 +00:00
flags = 0
2010-02-16 04:09:07 +00:00
def __init__(self, expr):
self.expr = expr
try:
2015-09-05 18:45:58 +00:00
self.re = re.compile(self.expr, self.flags)
except:
2015-05-30 00:03:28 +00:00
raise ValueError("Cannot compile expression.")
2010-02-16 04:09:07 +00:00
2010-02-16 04:09:07 +00:00
def _check_content_type(expr, o):
2015-09-05 18:45:58 +00:00
val = o.headers.get("content-type")
if val and re.search(expr, val):
2010-02-16 04:09:07 +00:00
return True
return False
2012-02-10 02:04:20 +00:00
2010-02-16 04:09:07 +00:00
class FAsset(_Action):
code = "a"
help = "Match asset in response: CSS, Javascript, Flash, images."
ASSET_TYPES = [
"text/javascript",
"application/x-javascript",
"application/javascript",
"text/css",
"image/.*",
"application/x-shockwave-flash"
]
2015-05-30 00:03:28 +00:00
def __call__(self, f):
if f.response:
for i in self.ASSET_TYPES:
if _check_content_type(i, f.response):
return True
return False
2010-02-16 04:09:07 +00:00
class FContentType(_Rex):
code = "t"
help = "Content-type header"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
if _check_content_type(self.expr, f.request):
2010-02-16 04:09:07 +00:00
return True
2012-02-23 04:03:58 +00:00
elif f.response and _check_content_type(self.expr, f.response):
2010-02-16 04:09:07 +00:00
return True
2012-02-23 04:03:58 +00:00
return False
2010-02-16 04:09:07 +00:00
class FRequestContentType(_Rex):
code = "tq"
help = "Request Content-Type header"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return _check_content_type(self.expr, f.request)
2010-02-16 04:09:07 +00:00
class FResponseContentType(_Rex):
code = "ts"
2012-07-06 10:33:09 +00:00
help = "Response Content-Type header"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
if f.response:
return _check_content_type(self.expr, f.response)
return False
2010-02-16 04:09:07 +00:00
class FHead(_Rex):
code = "h"
help = "Header"
2015-09-05 18:45:58 +00:00
flags = re.MULTILINE
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
if f.request and self.re.search(str(f.request.headers)):
2012-02-23 04:03:58 +00:00
return True
2015-09-05 18:45:58 +00:00
if f.response and self.re.search(str(f.response.headers)):
2012-02-23 04:03:58 +00:00
return True
return False
2012-02-10 02:04:20 +00:00
2010-02-16 04:09:07 +00:00
class FHeadRequest(_Rex):
code = "hq"
help = "Request header"
2015-09-05 18:45:58 +00:00
flags = re.MULTILINE
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
if f.request and self.re.search(str(f.request.headers)):
2012-02-23 04:03:58 +00:00
return True
2010-02-16 04:09:07 +00:00
class FHeadResponse(_Rex):
code = "hs"
help = "Response header"
2015-09-05 18:45:58 +00:00
flags = re.MULTILINE
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
if f.response and self.re.search(str(f.response.headers)):
2012-02-23 04:03:58 +00:00
return True
2010-02-16 04:09:07 +00:00
class FBod(_Rex):
code = "b"
help = "Body"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2014-08-03 00:34:29 +00:00
if f.request and f.request.content:
2015-09-05 18:45:58 +00:00
if self.re.search(f.request.get_decoded_content()):
2015-09-04 00:11:09 +00:00
return True
2014-08-03 00:34:29 +00:00
if f.response and f.response.content:
2015-09-05 18:45:58 +00:00
if self.re.search(f.response.get_decoded_content()):
2015-09-04 00:11:09 +00:00
return True
2010-02-16 04:09:07 +00:00
return False
class FBodRequest(_Rex):
code = "bq"
help = "Request body"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2014-08-03 00:34:29 +00:00
if f.request and f.request.content:
2015-09-05 18:45:58 +00:00
if self.re.search(f.request.get_decoded_content()):
2015-09-04 00:11:09 +00:00
return True
2010-02-16 04:09:07 +00:00
class FBodResponse(_Rex):
code = "bs"
help = "Response body"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2014-08-03 00:34:29 +00:00
if f.response and f.response.content:
2015-09-05 18:45:58 +00:00
if self.re.search(f.response.get_decoded_content()):
2015-09-04 00:11:09 +00:00
return True
class FMethod(_Rex):
code = "m"
help = "Method"
2015-09-05 18:45:58 +00:00
flags = re.IGNORECASE
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
return bool(self.re.search(f.request.method))
2010-02-16 04:09:07 +00:00
2012-07-06 10:21:44 +00:00
class FDomain(_Rex):
code = "d"
help = "Domain"
2015-09-05 18:45:58 +00:00
flags = re.IGNORECASE
2015-05-30 00:03:28 +00:00
2012-07-06 10:21:44 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
return bool(self.re.search(f.request.host))
2012-07-06 10:21:44 +00:00
2010-02-16 04:09:07 +00:00
class FUrl(_Rex):
code = "u"
help = "URL"
# FUrl is special, because it can be "naked".
2015-05-30 00:03:28 +00:00
2010-02-16 04:09:07 +00:00
@classmethod
def make(klass, s, loc, toks):
if len(toks) > 1:
toks = toks[1:]
return klass(*toks)
2010-02-16 04:09:07 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
2015-09-05 18:45:58 +00:00
return self.re.search(f.request.url)
2010-02-16 04:09:07 +00:00
class FSrc(_Rex):
code = "src"
help = "Match source address"
def __call__(self, f):
2015-09-05 18:45:58 +00:00
return f.client_conn.address and self.re.search(repr(f.client_conn.address))
class FDst(_Rex):
code = "dst"
help = "Match destination address"
def __call__(self, f):
2015-09-05 18:45:58 +00:00
return f.server_conn.address and self.re.search(repr(f.server_conn.address))
2010-02-16 04:09:07 +00:00
class _Int(_Action):
def __init__(self, num):
self.num = int(num)
class FCode(_Int):
code = "c"
help = "HTTP response code"
2015-05-30 00:03:28 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
if f.response and f.response.status_code == self.num:
2012-02-23 04:03:58 +00:00
return True
2010-02-16 04:09:07 +00:00
class FAnd(_Token):
def __init__(self, lst):
self.lst = lst
def dump(self, indent=0, fp=sys.stdout):
2015-05-30 00:03:28 +00:00
print >> fp, "\t" * indent, self.__class__.__name__
2010-02-16 04:09:07 +00:00
for i in self.lst:
2015-05-30 00:03:28 +00:00
i.dump(indent + 1, fp)
2010-02-16 04:09:07 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return all(i(f) for i in self.lst)
2010-02-16 04:09:07 +00:00
class FOr(_Token):
def __init__(self, lst):
self.lst = lst
def dump(self, indent=0, fp=sys.stdout):
2015-05-30 00:03:28 +00:00
print >> fp, "\t" * indent, self.__class__.__name__
2010-02-16 04:09:07 +00:00
for i in self.lst:
2015-05-30 00:03:28 +00:00
i.dump(indent + 1, fp)
2010-02-16 04:09:07 +00:00
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return any(i(f) for i in self.lst)
2010-02-16 04:09:07 +00:00
class FNot(_Token):
def __init__(self, itm):
self.itm = itm[0]
def dump(self, indent=0, fp=sys.stdout):
2015-05-30 00:03:28 +00:00
print >> fp, "\t" * indent, self.__class__.__name__
2010-02-16 04:09:07 +00:00
self.itm.dump(indent + 1, fp)
2012-02-23 04:03:58 +00:00
def __call__(self, f):
return not self.itm(f)
2010-02-16 04:09:07 +00:00
filt_unary = [
FReq,
FResp,
FAsset,
FErr
2010-02-16 04:09:07 +00:00
]
filt_rex = [
FHeadRequest,
FHeadResponse,
FHead,
FBodRequest,
FBodResponse,
FBod,
FMethod,
2012-07-06 10:21:44 +00:00
FDomain,
2010-02-16 04:09:07 +00:00
FUrl,
FRequestContentType,
FResponseContentType,
FContentType,
FSrc,
FDst,
2010-02-16 04:09:07 +00:00
]
filt_int = [
FCode
]
2015-05-30 00:03:28 +00:00
2010-02-16 04:09:07 +00:00
def _make():
# Order is important - multi-char expressions need to come before narrow
# ones.
parts = []
for klass in filt_unary:
f = pp.Literal("~%s" % klass.code) + pp.WordEnd()
2010-02-16 04:09:07 +00:00
f.setParseAction(klass.make)
parts.append(f)
2015-05-30 00:03:28 +00:00
simplerex = "".join(c for c in pp.printables if c not in "()~'\"")
2010-02-16 04:09:07 +00:00
rex = pp.Word(simplerex) |\
2015-05-30 00:03:28 +00:00
pp.QuotedString("\"", escChar='\\') |\
pp.QuotedString("'", escChar='\\')
2010-02-16 04:09:07 +00:00
for klass in filt_rex:
f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy()
2010-02-16 04:09:07 +00:00
f.setParseAction(klass.make)
parts.append(f)
for klass in filt_int:
f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums)
2010-02-16 04:09:07 +00:00
f.setParseAction(klass.make)
parts.append(f)
# A naked rex is a URL rex:
f = rex.copy()
f.setParseAction(FUrl.make)
parts.append(f)
atom = pp.MatchFirst(parts)
2015-05-30 00:03:28 +00:00
expr = pp.operatorPrecedence(atom,
[(pp.Literal("!").suppress(),
1,
pp.opAssoc.RIGHT,
lambda x: FNot(*x)),
(pp.Literal("&").suppress(),
2,
pp.opAssoc.LEFT,
lambda x: FAnd(*x)),
(pp.Literal("|").suppress(),
2,
pp.opAssoc.LEFT,
lambda x: FOr(*x)),
])
2010-02-16 04:09:07 +00:00
expr = pp.OneOrMore(expr)
return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
bnf = _make()
def parse(s):
try:
2014-11-22 14:27:43 +00:00
filt = bnf.parseString(s, parseAll=True)[0]
filt.pattern = s
return filt
except pp.ParseException:
2010-02-16 04:09:07 +00:00
return None
except ValueError:
return None
2010-02-16 04:09:07 +00:00
2014-12-26 02:10:24 +00:00
help = []
for i in filt_unary:
help.append(
2015-05-30 00:03:28 +00:00
("~%s" % i.code, i.help)
2014-12-26 02:10:24 +00:00
)
for i in filt_rex:
help.append(
2015-05-30 00:03:28 +00:00
("~%s regex" % i.code, i.help)
2014-12-26 02:10:24 +00:00
)
for i in filt_int:
help.append(
2015-05-30 00:03:28 +00:00
("~%s int" % i.code, i.help)
2014-12-26 02:10:24 +00:00
)
help.sort()
help.extend(
[
("!", "unary not"),
("&", "and"),
("|", "or"),
("(...)", "grouping"),
]
2015-05-30 00:03:28 +00:00
)