Shift a bunch more string-related functions to strutils

This commit is contained in:
Aldo Cortesi 2016-06-02 13:03:37 +12:00
parent 31012d782f
commit 09da1febbd
9 changed files with 82 additions and 87 deletions

View File

@ -1,7 +1,8 @@
import string import string
import lxml.html import lxml.html
import lxml.etree import lxml.etree
from mitmproxy import utils, contentviews from mitmproxy import contentviews
from netlib import strutils
class ViewPigLatin(contentviews.View): class ViewPigLatin(contentviews.View):
@ -10,7 +11,7 @@ class ViewPigLatin(contentviews.View):
content_types = ["text/html"] content_types = ["text/html"]
def __call__(self, data, **metadata): def __call__(self, data, **metadata):
if utils.isXML(data): if strutils.isXML(data):
parser = lxml.etree.HTMLParser( parser = lxml.etree.HTMLParser(
strip_cdata=True, strip_cdata=True,
remove_blank_text=True remove_blank_text=True

View File

@ -37,7 +37,6 @@ from netlib import http
from netlib import odict from netlib import odict
from netlib.http import url from netlib.http import url
from netlib import strutils from netlib import strutils
import netlib.utils
try: try:
import pyamf import pyamf
@ -130,11 +129,11 @@ class ViewAuto(View):
ct = "%s/%s" % (ct[0], ct[1]) ct = "%s/%s" % (ct[0], ct[1])
if ct in content_types_map: if ct in content_types_map:
return content_types_map[ct][0](data, **metadata) return content_types_map[ct][0](data, **metadata)
elif mitmproxy.utils.isXML(data): elif strutils.isXML(data):
return get("XML")(data, **metadata) return get("XML")(data, **metadata)
if metadata.get("query"): if metadata.get("query"):
return get("Query")(data, **metadata) return get("Query")(data, **metadata)
if data and mitmproxy.utils.isMostlyBin(data): if data and strutils.isMostlyBin(data):
return get("Hex")(data) return get("Hex")(data)
if not data: if not data:
return "No content", [] return "No content", []
@ -157,7 +156,7 @@ class ViewHex(View):
@staticmethod @staticmethod
def _format(data): def _format(data):
for offset, hexa, s in netlib.utils.hexdump(data): for offset, hexa, s in strutils.hexdump(data):
yield [ yield [
("offset", offset + " "), ("offset", offset + " "),
("text", hexa + " "), ("text", hexa + " "),
@ -227,7 +226,7 @@ class ViewHTML(View):
content_types = ["text/html"] content_types = ["text/html"]
def __call__(self, data, **metadata): def __call__(self, data, **metadata):
if mitmproxy.utils.isXML(data): if strutils.isXML(data):
parser = lxml.etree.HTMLParser( parser = lxml.etree.HTMLParser(
strip_cdata=True, strip_cdata=True,
remove_blank_text=True remove_blank_text=True

View File

@ -25,32 +25,6 @@ def format_timestamp_with_milli(s):
return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
def isBin(s):
"""
Does this string have any non-ASCII characters?
"""
for i in s:
i = ord(i)
if i < 9 or 13 < i < 32 or 126 < i:
return True
return False
def isMostlyBin(s):
s = s[:100]
return sum(isBin(ch) for ch in s) / len(s) > 0.3
def isXML(s):
for i in s:
if i in "\n \t":
continue
elif i == "<":
return True
else:
return False
def pretty_json(s): def pretty_json(s):
try: try:
p = json.loads(s) p = json.loads(s)
@ -92,15 +66,3 @@ class LRUCache:
d = self.cacheList.pop() d = self.cacheList.pop()
self.cache.pop(d) self.cache.pop(d)
return ret return ret
def clean_hanging_newline(t):
"""
Many editors will silently add a newline to the final line of a
document (I'm looking at you, Vim). This function fixes this common
problem at the risk of removing a hanging newline in the rare cases
where the user actually intends it.
"""
if t and t[-1] == "\n":
return t[:-1]
return t

View File

@ -101,3 +101,54 @@ def escaped_str_to_bytes(data):
# This one is difficult - we use an undocumented Python API here # This one is difficult - we use an undocumented Python API here
# as per http://stackoverflow.com/a/23151714/934719 # as per http://stackoverflow.com/a/23151714/934719
return codecs.escape_decode(data)[0] return codecs.escape_decode(data)[0]
def isBin(s):
"""
Does this string have any non-ASCII characters?
"""
for i in s:
i = ord(i)
if i < 9 or 13 < i < 32 or 126 < i:
return True
return False
def isMostlyBin(s):
s = s[:100]
return sum(isBin(ch) for ch in s) / len(s) > 0.3
def isXML(s):
for i in s:
if i in "\n \t":
continue
elif i == "<":
return True
else:
return False
def clean_hanging_newline(t):
"""
Many editors will silently add a newline to the final line of a
document (I'm looking at you, Vim). This function fixes this common
problem at the risk of removing a hanging newline in the rare cases
where the user actually intends it.
"""
if t and t[-1] == "\n":
return t[:-1]
return t
def hexdump(s):
"""
Returns:
A generator of (offset, hex, str) tuples
"""
for i in range(0, len(s), 16):
offset = "{:0=10x}".format(i).encode()
part = s[i:i + 16]
x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part))
x = x.ljust(47) # 16*2 + 15
yield (offset, x, clean_bin(part, False))

View File

@ -6,21 +6,6 @@ import inspect
import six import six
from netlib import strutils
def hexdump(s):
"""
Returns:
A generator of (offset, hex, str) tuples
"""
for i in range(0, len(s), 16):
offset = "{:0=10x}".format(i).encode()
part = s[i:i + 16]
x = b" ".join("{:0=2x}".format(i).encode() for i in six.iterbytes(part))
x = x.ljust(47) # 16*2 + 15
yield (offset, x, strutils.clean_bin(part, False))
def setbit(byte, offset, value): def setbit(byte, offset, value):
""" """

View File

@ -2,9 +2,6 @@ import datetime
import six import six
import netlib.utils
import netlib.tcp
import netlib.http
from netlib import strutils from netlib import strutils
TIMEFMT = '%d-%m-%y %H:%M:%S' TIMEFMT = '%d-%m-%y %H:%M:%S'
@ -63,7 +60,7 @@ class LogCtx(object):
def dump(self, data, hexdump): def dump(self, data, hexdump):
if hexdump: if hexdump:
for line in netlib.utils.hexdump(data): for line in strutils.hexdump(data):
self("\t%s %s %s" % line) self("\t%s %s %s" % line)
else: else:
for i in strutils.clean_bin(data).split("\n"): for i in strutils.clean_bin(data).split("\n"):

View File

@ -13,25 +13,6 @@ def test_format_timestamp_with_milli():
assert utils.format_timestamp_with_milli(utils.timestamp()) assert utils.format_timestamp_with_milli(utils.timestamp())
def test_isBin():
assert not utils.isBin("testing\n\r")
assert utils.isBin("testing\x01")
assert utils.isBin("testing\x0e")
assert utils.isBin("testing\x7f")
def test_isXml():
assert not utils.isXML("foo")
assert utils.isXML("<foo")
assert utils.isXML(" \n<foo")
def test_clean_hanging_newline():
s = "foo\n"
assert utils.clean_hanging_newline(s) == "foo"
assert utils.clean_hanging_newline("foo") == "foo"
def test_pkg_data(): def test_pkg_data():
assert utils.pkg_data.path("console") assert utils.pkg_data.path("console")
tutils.raises("does not exist", utils.pkg_data.path, "nonexistent") tutils.raises("does not exist", utils.pkg_data.path, "nonexistent")

View File

@ -38,3 +38,26 @@ def test_escaped_str_to_bytes():
assert strutils.escaped_str_to_bytes(u"\\x08") == b"\b" assert strutils.escaped_str_to_bytes(u"\\x08") == b"\b"
assert strutils.escaped_str_to_bytes(u"&!?=\\\\)") == br"&!?=\)" assert strutils.escaped_str_to_bytes(u"&!?=\\\\)") == br"&!?=\)"
assert strutils.escaped_str_to_bytes(u"ü") == b'\xc3\xbc' assert strutils.escaped_str_to_bytes(u"ü") == b'\xc3\xbc'
def test_isBin():
assert not strutils.isBin("testing\n\r")
assert strutils.isBin("testing\x01")
assert strutils.isBin("testing\x0e")
assert strutils.isBin("testing\x7f")
def test_isXml():
assert not strutils.isXML("foo")
assert strutils.isXML("<foo")
assert strutils.isXML(" \n<foo")
def test_clean_hanging_newline():
s = "foo\n"
assert strutils.clean_hanging_newline(s) == "foo"
assert strutils.clean_hanging_newline("foo") == "foo"
def test_hexdump():
assert list(strutils.hexdump(b"one\0" * 10))

View File

@ -10,7 +10,3 @@ def test_bidi():
assert b.get_name(5) is None assert b.get_name(5) is None
tutils.raises(AttributeError, getattr, b, "c") tutils.raises(AttributeError, getattr, b, "c")
tutils.raises(ValueError, utils.BiDi, one=1, two=1) tutils.raises(ValueError, utils.BiDi, one=1, two=1)
def test_hexdump():
assert list(utils.hexdump(b"one\0" * 10))