Merge pull request #1426 from mhils/query-encoding

Fix query/path_components encoding
This commit is contained in:
Maximilian Hils 2016-07-24 21:17:35 -07:00 committed by GitHub
commit 56796aeda2
4 changed files with 94 additions and 21 deletions

View File

@ -253,14 +253,13 @@ class Request(message.Message):
)
def _get_query(self):
_, _, _, _, query, _ = urllib.parse.urlparse(self.url)
query = urllib.parse.urlparse(self.url).query
return tuple(netlib.http.url.decode(query))
def _set_query(self, value):
query = netlib.http.url.encode(value)
scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
_, _, _, self.path = netlib.http.url.parse(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
def _set_query(self, query_data):
query = netlib.http.url.encode(query_data)
_, _, path, params, _, fragment = urllib.parse.urlparse(self.url)
self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
@query.setter
def query(self, value):
@ -296,19 +295,18 @@ class Request(message.Message):
The URL's path components as a tuple of strings.
Components are unquoted.
"""
_, _, path, _, _, _ = urllib.parse.urlparse(self.url)
path = urllib.parse.urlparse(self.url).path
# This needs to be a tuple so that it's immutable.
# Otherwise, this would fail silently:
# request.path_components.append("foo")
return tuple(urllib.parse.unquote(i) for i in path.split("/") if i)
return tuple(netlib.http.url.unquote(i) for i in path.split("/") if i)
@path_components.setter
def path_components(self, components):
components = map(lambda x: urllib.parse.quote(x, safe=""), components)
components = map(lambda x: netlib.http.url.quote(x, safe=""), components)
path = "/" + "/".join(components)
scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
_, _, _, self.path = netlib.http.url.parse(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
_, _, _, params, query, fragment = urllib.parse.urlparse(self.url)
self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
def anticache(self):
"""
@ -365,13 +363,13 @@ class Request(message.Message):
pass
return ()
def _set_urlencoded_form(self, value):
def _set_urlencoded_form(self, form_data):
"""
Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
self.content = netlib.http.url.encode(value).encode()
self.content = netlib.http.url.encode(form_data).encode()
@urlencoded_form.setter
def urlencoded_form(self, value):

View File

@ -82,19 +82,51 @@ def unparse(scheme, host, port, path=""):
def encode(s):
# type: (six.text_type, bytes) -> str
# type: Sequence[Tuple[str,str]] -> str
"""
Takes a list of (key, value) tuples and returns a urlencoded string.
"""
s = [tuple(i) for i in s]
return urllib.parse.urlencode(s, False)
if six.PY2:
return urllib.parse.urlencode(s, False)
else:
return urllib.parse.urlencode(s, False, errors="surrogateescape")
def decode(s):
"""
Takes a urlencoded string and returns a list of (key, value) tuples.
Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples.
"""
return urllib.parse.parse_qsl(s, keep_blank_values=True)
if six.PY2:
return urllib.parse.parse_qsl(s, keep_blank_values=True)
else:
return urllib.parse.parse_qsl(s, keep_blank_values=True, errors='surrogateescape')
def quote(b, safe="/"):
"""
Returns:
An ascii-encodable str.
"""
# type: (str) -> str
if six.PY2:
return urllib.parse.quote(b, safe=safe)
else:
return urllib.parse.quote(b, safe=safe, errors="surrogateescape")
def unquote(s):
"""
Args:
s: A surrogate-escaped str
Returns:
A surrogate-escaped str
"""
# type: (str) -> str
if six.PY2:
return urllib.parse.unquote(s)
else:
return urllib.parse.unquote(s, errors="surrogateescape")
def hostport(scheme, host, port):

View File

@ -98,6 +98,9 @@ def bytes_to_escaped_str(data, keep_spacing=False):
def escaped_str_to_bytes(data):
"""
Take an escaped string and return the unescaped bytes equivalent.
Raises:
ValueError, if the escape sequence is invalid.
"""
if not isinstance(data, six.string_types):
if six.PY2:

View File

@ -1,3 +1,4 @@
import six
from netlib import tutils
from netlib.http import url
@ -57,10 +58,49 @@ def test_unparse():
assert url.unparse("https", "foo.com", 443, "") == "https://foo.com"
def test_urlencode():
if six.PY2:
surrogates = bytes(bytearray(range(256)))
else:
surrogates = bytes(range(256)).decode("utf8", "surrogateescape")
surrogates_quoted = (
'%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F'
'%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F'
'%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-./'
'0123456789%3A%3B%3C%3D%3E%3F'
'%40ABCDEFGHIJKLMNO'
'PQRSTUVWXYZ%5B%5C%5D%5E_'
'%60abcdefghijklmno'
'pqrstuvwxyz%7B%7C%7D%7E%7F'
'%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F'
'%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F'
'%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF'
'%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF'
'%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF'
'%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF'
'%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF'
'%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF'
)
def test_encode():
assert url.encode([('foo', 'bar')])
assert url.encode([('foo', surrogates)])
def test_urldecode():
def test_decode():
s = "one=two&three=four"
assert len(url.decode(s)) == 2
assert url.decode(surrogates)
def test_quote():
assert url.quote("foo") == "foo"
assert url.quote("foo bar") == "foo%20bar"
assert url.quote(surrogates) == surrogates_quoted
def test_unquote():
assert url.unquote("foo") == "foo"
assert url.unquote("foo%20bar") == "foo bar"
assert url.unquote(surrogates_quoted) == surrogates