From 6e153b2c017be294a23e78469367346d0f9250e2 Mon Sep 17 00:00:00 2001 From: rjt-gupta Date: Sun, 24 Feb 2019 01:45:45 +0530 Subject: [PATCH] filter unicode fix --- mitmproxy/flowfilter.py | 23 +++++++++++++++++++++++ test/mitmproxy/test_flowfilter.py | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/mitmproxy/flowfilter.py b/mitmproxy/flowfilter.py index 7f8df96f5..0d8f10622 100644 --- a/mitmproxy/flowfilter.py +++ b/mitmproxy/flowfilter.py @@ -475,7 +475,30 @@ def _make(): parts.append(f) simplerex = "".join(c for c in pp.printables if c not in "()~'\"") + alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas + alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas + alphgreek = pp.pyparsing_unicode.Greek.alphas + alphchinese = pp.pyparsing_unicode.Chinese.alphas + alpharabic = pp.pyparsing_unicode.Arabic.alphas + alphhebrew = pp.pyparsing_unicode.Hebrew.alphas + alphjapanese = pp.pyparsing_unicode.Japanese.alphas + alphkorean = pp.pyparsing_unicode.Korean.alphas + alphlatin1 = pp.pyparsing_unicode.Latin1.alphas + alphlatinA = pp.pyparsing_unicode.LatinA.alphas + alphlatinB = pp.pyparsing_unicode.LatinB.alphas + rex = pp.Word(simplerex) |\ + pp.Word(alphcyrillic) |\ + pp.Word(alphgreek) |\ + pp.Word(alphchinese) |\ + pp.Word(alpharabic) |\ + pp.Word(alphdevanagari) |\ + pp.Word(alphhebrew) |\ + pp.Word(alphjapanese) |\ + pp.Word(alphkorean) |\ + pp.Word(alphlatin1) |\ + pp.Word(alphlatinA) |\ + pp.Word(alphlatinB) |\ pp.QuotedString("\"", escChar='\\') |\ pp.QuotedString("'", escChar='\\') for klass in filter_rex: diff --git a/test/mitmproxy/test_flowfilter.py b/test/mitmproxy/test_flowfilter.py index 4eb37d813..d53cec7d7 100644 --- a/test/mitmproxy/test_flowfilter.py +++ b/test/mitmproxy/test_flowfilter.py @@ -28,6 +28,9 @@ class TestParsing: self._dump(p) assert len(p.lst) == 2 + def test_non_ascii(self): + assert flowfilter.parse("~s шгн") + def test_naked_url(self): a = flowfilter.parse("foobar ~h rex") assert a.lst[0].expr == "foobar" @@ -173,10 +176,30 @@ class TestMatchingHTTPFlow: assert not self.q("~bq message", q) assert not self.q("~bq message", s) + s.response.text = 'яч' # Cyrillic + assert self.q("~bs яч", s) + s.response.text = '测试' # Chinese + assert self.q('~bs 测试', s) + s.response.text = 'ॐ' # Hindi + assert self.q('~bs ॐ', s) + s.response.text = 'لله' # Arabic + assert self.q('~bs لله', s) + s.response.text = 'θεός' # Greek + assert self.q('~bs θεός', s) + s.response.text = 'לוהים' # Hebrew + assert self.q('~bs לוהים', s) + s.response.text = '神' # Japanese + assert self.q('~bs 神', s) + s.response.text = '하나님' # Korean + assert self.q('~bs 하나님', s) + s.response.text = 'Äÿ' # Latin + assert self.q('~bs Äÿ', s) + assert not self.q("~bs nomatch", s) assert not self.q("~bs content", q) assert not self.q("~bs content", s) assert not self.q("~bs message", q) + s.response.text = 'message' assert self.q("~bs message", s) def test_body(self):