mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-22 15:37:45 +00:00
Rip out BeautifulSoup, and use a custom XML-ish prettyprinter.
This commit is contained in:
parent
44dc3a052e
commit
7156d1a73a
@ -307,7 +307,7 @@ class ConnectionView(WWrap):
|
||||
])
|
||||
|
||||
def _view_pretty(self, conn, txt):
|
||||
for i in utils.prettybody(conn.content):
|
||||
for i in utils.pretty_xmlish(conn.content):
|
||||
txt.append(
|
||||
("text", i),
|
||||
)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,9 +12,7 @@
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re, os, subprocess, datetime
|
||||
from contrib import BeautifulSoup
|
||||
import re, os, subprocess, datetime, textwrap
|
||||
|
||||
|
||||
def format_timestamp(s):
|
||||
@ -48,14 +46,48 @@ def cleanBin(s):
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def prettybody(s):
|
||||
TAG = r"""
|
||||
<\s*
|
||||
(?!\s*[!"])
|
||||
(?P<close>\s*\/)?
|
||||
(?P<name>\w+)
|
||||
(
|
||||
[a-zA-Z0-9_#:=().%\/]+
|
||||
|
|
||||
"[^\"]*"['\"]*
|
||||
|
|
||||
'[^']*'['\"]*
|
||||
|
|
||||
\s+
|
||||
)*
|
||||
(?P<selfcont>\s*\/\s*)?
|
||||
\s*>
|
||||
"""
|
||||
UNI = set(["br", "hr", "img", "input", "area", "link"])
|
||||
INDENT = " "*4
|
||||
def pretty_xmlish(s):
|
||||
"""
|
||||
Return a list of pretty-printed lines.
|
||||
This is a robust, general pretty-printer for XML-ish data.
|
||||
Returns a list of lines.
|
||||
"""
|
||||
s = BeautifulSoup.BeautifulStoneSoup(s)
|
||||
s = s.prettify().strip()
|
||||
parts = s.split("\n")
|
||||
return [repr(i)[1:-1] for i in parts]
|
||||
data, offset, indent, prev = [], 0, 0, None
|
||||
for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
|
||||
start, end = i.span()
|
||||
name = i.group("name")
|
||||
if start > offset:
|
||||
txt = []
|
||||
for x in textwrap.dedent(s[offset:start]).split("\n"):
|
||||
if x.strip():
|
||||
txt.append(indent*INDENT + x)
|
||||
data.extend(txt)
|
||||
if i.group("close") and not (name in UNI and name==prev):
|
||||
indent = max(indent - 1, 0)
|
||||
data.append(indent*INDENT + i.group().strip())
|
||||
offset = end
|
||||
if not any([i.group("close"), i.group("selfcont"), name in UNI]):
|
||||
indent += 1
|
||||
prev = name
|
||||
return data
|
||||
|
||||
|
||||
def hexdump(s):
|
||||
|
@ -1,4 +1,4 @@
|
||||
import textwrap, cStringIO, os, time
|
||||
import textwrap, cStringIO, os, time, re
|
||||
import libpry
|
||||
from libmproxy import utils
|
||||
|
||||
@ -228,13 +228,59 @@ class umake_bogus_cert(libpry.AutoTree):
|
||||
assert "CERTIFICATE" in d
|
||||
|
||||
|
||||
class uprettybody(libpry.AutoTree):
|
||||
def test_all(self):
|
||||
s = "<html><p></p></html>"
|
||||
assert utils.prettybody(s)
|
||||
class upretty_xmlish(libpry.AutoTree):
|
||||
def test_tagre(self):
|
||||
def f(s):
|
||||
return re.search(utils.TAG, s, re.VERBOSE|re.MULTILINE)
|
||||
assert f(r"<body>")
|
||||
assert f(r"<body/>")
|
||||
assert f(r"< body/>")
|
||||
assert f(r"< body/ >")
|
||||
assert f(r"< body / >")
|
||||
assert f(r"<foo a=b>")
|
||||
assert f(r"<foo a='b'>")
|
||||
assert f(r"<foo a='b\"'>")
|
||||
assert f(r'<a b=(a.b) href="foo">')
|
||||
assert f('<td width=25%>')
|
||||
|
||||
def test_all(self):
|
||||
def isbalanced(ret):
|
||||
# The last tag should have no indent
|
||||
assert ret[-1].strip() == ret[-1]
|
||||
|
||||
s = "<html><br><br></br><p>one</p></html>"
|
||||
ret = utils.pretty_xmlish(s)
|
||||
isbalanced(ret)
|
||||
|
||||
s = r"""
|
||||
<body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="document.f.q.focus();if(document.images)new Image().src='/images/srpr/nav_logo27.png'" ><textarea id=csi style=display:none></textarea></body>
|
||||
"""
|
||||
isbalanced(utils.pretty_xmlish(textwrap.dedent(s)))
|
||||
|
||||
s = r"""
|
||||
<a href="http://foo.com" target="">
|
||||
<img src="http://foo.gif" alt="bar" height="25" width="132">
|
||||
</a>
|
||||
"""
|
||||
isbalanced(utils.pretty_xmlish(textwrap.dedent(s)))
|
||||
|
||||
s = r"""
|
||||
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
|
||||
\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
|
||||
<html></html>
|
||||
"""
|
||||
ret = utils.pretty_xmlish(textwrap.dedent(s))
|
||||
isbalanced(ret)
|
||||
|
||||
s = "<html><br/><p>one</p></html>"
|
||||
ret = utils.pretty_xmlish(s)
|
||||
assert len(ret) == 6
|
||||
isbalanced(ret)
|
||||
|
||||
s = "gobbledygook"
|
||||
print utils.pretty_xmlish(s)
|
||||
|
||||
|
||||
s = "".join([chr(i) for i in range(256)])
|
||||
assert utils.prettybody(s)
|
||||
|
||||
|
||||
|
||||
@ -249,5 +295,5 @@ tests = [
|
||||
uMultiDict(),
|
||||
uHeaders(),
|
||||
uData(),
|
||||
uprettybody(),
|
||||
upretty_xmlish(),
|
||||
]
|
||||
|
6
todo
6
todo
@ -1,15 +1,11 @@
|
||||
|
||||
Futures:
|
||||
|
||||
- Timestamps
|
||||
|
||||
- Strings view for binary responses.
|
||||
- Post and URL field parsing and editing.
|
||||
- On-the-fly generation of keys, signed with a CA
|
||||
- Pass-through fast-track for things that don't match filter?
|
||||
- Reading contents from file
|
||||
- Shortcut for viewing in pager
|
||||
- Serializing and de-serializing requests and responses.
|
||||
- Upstream proxies.
|
||||
- mitmdump
|
||||
- Filters
|
||||
@ -17,7 +13,9 @@ Futures:
|
||||
- Pipe to script
|
||||
- Command-line replay or serialized flows
|
||||
|
||||
|
||||
Bugs:
|
||||
|
||||
- In some circumstances, long URLs in list view are line-broken oddly.
|
||||
- Termination sometimes hangs.
|
||||
- When a bug in mitmproxy causes a stack trace, we hang on exit.
|
||||
|
Loading…
Reference in New Issue
Block a user