Further fine-tuning for pretty_xmlish.

This commit is contained in:
Aldo Cortesi 2011-02-06 16:56:13 +13:00
parent 7156d1a73a
commit a4eaafab5a
2 changed files with 12 additions and 9 deletions

View File

@ -52,13 +52,10 @@ TAG = r"""
(?P<close>\s*\/)? (?P<close>\s*\/)?
(?P<name>\w+) (?P<name>\w+)
( (
[a-zA-Z0-9_#:=().%\/]+ [^'"\t >]+ |
| "[^\"]*"['\"]* |
"[^\"]*"['\"]* '[^']*'['\"]* |
| \s+
'[^']*'['\"]*
|
\s+
)* )*
(?P<selfcont>\s*\/\s*)? (?P<selfcont>\s*\/\s*)?
\s*> \s*>
@ -67,7 +64,7 @@ UNI = set(["br", "hr", "img", "input", "area", "link"])
INDENT = " "*4 INDENT = " "*4
def pretty_xmlish(s): def pretty_xmlish(s):
""" """
This is a robust, general pretty-printer for XML-ish data. A robust pretty-printer for XML-ish data.
Returns a list of lines. Returns a list of lines.
""" """
data, offset, indent, prev = [], 0, 0, None data, offset, indent, prev = [], 0, 0, None
@ -87,6 +84,9 @@ def pretty_xmlish(s):
if not any([i.group("close"), i.group("selfcont"), name in UNI]): if not any([i.group("close"), i.group("selfcont"), name in UNI]):
indent += 1 indent += 1
prev = name prev = name
trail = s[offset:]
if trail.strip():
data.append(s[offset:])
return data return data

View File

@ -242,6 +242,9 @@ class upretty_xmlish(libpry.AutoTree):
assert f(r"<foo a='b\"'>") assert f(r"<foo a='b\"'>")
assert f(r'<a b=(a.b) href="foo">') assert f(r'<a b=(a.b) href="foo">')
assert f('<td width=25%>') assert f('<td width=25%>')
assert f('<form name="search" action="/search.php" method="get" accept-charset="utf-8" class="search">')
assert f('<img src="gif" width="125" height="16" alt=&quot;&quot; />')
def test_all(self): def test_all(self):
def isbalanced(ret): def isbalanced(ret):
@ -278,7 +281,7 @@ class upretty_xmlish(libpry.AutoTree):
isbalanced(ret) isbalanced(ret)
s = "gobbledygook" s = "gobbledygook"
print utils.pretty_xmlish(s) assert utils.pretty_xmlish(s) == ["gobbledygook"]