Further fine-tuning for pretty_xmlish.

This commit is contained in:
Aldo Cortesi 2011-02-06 16:56:13 +13:00
parent 7156d1a73a
commit a4eaafab5a
2 changed files with 12 additions and 9 deletions

View File

@ -52,12 +52,9 @@ TAG = r"""
(?P<close>\s*\/)?
(?P<name>\w+)
(
[a-zA-Z0-9_#:=().%\/]+
|
"[^\"]*"['\"]*
|
'[^']*'['\"]*
|
[^'"\t >]+ |
"[^\"]*"['\"]* |
'[^']*'['\"]* |
\s+
)*
(?P<selfcont>\s*\/\s*)?
@ -67,7 +64,7 @@ UNI = set(["br", "hr", "img", "input", "area", "link"])
INDENT = " "*4
def pretty_xmlish(s):
"""
This is a robust, general pretty-printer for XML-ish data.
A robust pretty-printer for XML-ish data.
Returns a list of lines.
"""
data, offset, indent, prev = [], 0, 0, None
@ -87,6 +84,9 @@ def pretty_xmlish(s):
if not any([i.group("close"), i.group("selfcont"), name in UNI]):
indent += 1
prev = name
trail = s[offset:]
if trail.strip():
data.append(s[offset:])
return data

View File

@ -242,6 +242,9 @@ class upretty_xmlish(libpry.AutoTree):
assert f(r"<foo a='b\"'>")
assert f(r'<a b=(a.b) href="foo">')
assert f('<td width=25%>')
assert f('<form name="search" action="/search.php" method="get" accept-charset="utf-8" class="search">')
assert f('<img src="gif" width="125" height="16" alt=&quot;&quot; />')
def test_all(self):
def isbalanced(ret):
@ -278,7 +281,7 @@ class upretty_xmlish(libpry.AutoTree):
isbalanced(ret)
s = "gobbledygook"
print utils.pretty_xmlish(s)
assert utils.pretty_xmlish(s) == ["gobbledygook"]