Merge pull request #3486 from rjt-gupta/unicode-filter

filter unicode fix
This commit is contained in:
Thomas Kriechbaumer 2019-09-28 11:44:15 +02:00 committed by GitHub
commit 76bd3ef82d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 0 deletions

View File

@ -475,7 +475,30 @@ def _make():
parts.append(f) parts.append(f)
simplerex = "".join(c for c in pp.printables if c not in "()~'\"") simplerex = "".join(c for c in pp.printables if c not in "()~'\"")
alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas
alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas
alphgreek = pp.pyparsing_unicode.Greek.alphas
alphchinese = pp.pyparsing_unicode.Chinese.alphas
alpharabic = pp.pyparsing_unicode.Arabic.alphas
alphhebrew = pp.pyparsing_unicode.Hebrew.alphas
alphjapanese = pp.pyparsing_unicode.Japanese.alphas
alphkorean = pp.pyparsing_unicode.Korean.alphas
alphlatin1 = pp.pyparsing_unicode.Latin1.alphas
alphlatinA = pp.pyparsing_unicode.LatinA.alphas
alphlatinB = pp.pyparsing_unicode.LatinB.alphas
rex = pp.Word(simplerex) |\ rex = pp.Word(simplerex) |\
pp.Word(alphcyrillic) |\
pp.Word(alphgreek) |\
pp.Word(alphchinese) |\
pp.Word(alpharabic) |\
pp.Word(alphdevanagari) |\
pp.Word(alphhebrew) |\
pp.Word(alphjapanese) |\
pp.Word(alphkorean) |\
pp.Word(alphlatin1) |\
pp.Word(alphlatinA) |\
pp.Word(alphlatinB) |\
pp.QuotedString("\"", escChar='\\') |\ pp.QuotedString("\"", escChar='\\') |\
pp.QuotedString("'", escChar='\\') pp.QuotedString("'", escChar='\\')
for klass in filter_rex: for klass in filter_rex:

View File

@ -28,6 +28,9 @@ class TestParsing:
self._dump(p) self._dump(p)
assert len(p.lst) == 2 assert len(p.lst) == 2
def test_non_ascii(self):
assert flowfilter.parse("~s шгн")
def test_naked_url(self): def test_naked_url(self):
a = flowfilter.parse("foobar ~h rex") a = flowfilter.parse("foobar ~h rex")
assert a.lst[0].expr == "foobar" assert a.lst[0].expr == "foobar"
@ -173,10 +176,30 @@ class TestMatchingHTTPFlow:
assert not self.q("~bq message", q) assert not self.q("~bq message", q)
assert not self.q("~bq message", s) assert not self.q("~bq message", s)
s.response.text = 'яч' # Cyrillic
assert self.q("~bs яч", s)
s.response.text = '测试' # Chinese
assert self.q('~bs 测试', s)
s.response.text = '' # Hindi
assert self.q('~bs ॐ', s)
s.response.text = 'لله' # Arabic
assert self.q('~bs لله', s)
s.response.text = 'θεός' # Greek
assert self.q('~bs θεός', s)
s.response.text = 'לוהים' # Hebrew
assert self.q('~bs לוהים', s)
s.response.text = '' # Japanese
assert self.q('~bs 神', s)
s.response.text = '하나님' # Korean
assert self.q('~bs 하나님', s)
s.response.text = 'Äÿ' # Latin
assert self.q('~bs Äÿ', s)
assert not self.q("~bs nomatch", s) assert not self.q("~bs nomatch", s)
assert not self.q("~bs content", q) assert not self.q("~bs content", q)
assert not self.q("~bs content", s) assert not self.q("~bs content", s)
assert not self.q("~bs message", q) assert not self.q("~bs message", q)
s.response.text = 'message'
assert self.q("~bs message", s) assert self.q("~bs message", s)
def test_body(self): def test_body(self):