mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2025-01-30 23:09:44 +00:00
Merge pull request #3464 from rjt-gupta/url-fix
Non ascii characters in url
This commit is contained in:
commit
7d60dde76c
@ -21,16 +21,25 @@ def parse(url):
|
||||
Raises:
|
||||
ValueError, if the URL is not properly formatted.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
# Size of Ascii character after encoding is 1 byte which is same as its size
|
||||
# But non-Ascii character's size after encoding will be more than its size
|
||||
def ascii_check(l):
|
||||
if len(l) == len(str(l).encode()):
|
||||
return True
|
||||
return False
|
||||
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode()
|
||||
if not ascii_check(url):
|
||||
url = urllib.parse.urlsplit(url)
|
||||
url = list(url)
|
||||
url[3] = urllib.parse.quote(url[3])
|
||||
url = urllib.parse.urlunsplit(url)
|
||||
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if not parsed.hostname:
|
||||
raise ValueError("No hostname given")
|
||||
|
||||
if isinstance(url, bytes):
|
||||
host = parsed.hostname
|
||||
|
||||
# this should not raise a ValueError,
|
||||
# but we try to be very forgiving here and accept just everything.
|
||||
else:
|
||||
host = parsed.hostname.encode("idna")
|
||||
if isinstance(parsed, urllib.parse.ParseResult):
|
||||
|
@ -49,6 +49,17 @@ def test_parse():
|
||||
url.parse('http://lo[calhost')
|
||||
|
||||
|
||||
def test_ascii_check():
|
||||
|
||||
test_url = "https://xyz.tax-edu.net?flag=selectCourse&lc_id=42825&lc_name=茅莽莽猫氓猫氓".encode()
|
||||
scheme, host, port, full_path = url.parse(test_url)
|
||||
assert scheme == b'https'
|
||||
assert host == b'xyz.tax-edu.net'
|
||||
assert port == 443
|
||||
assert full_path == b'/?flag%3DselectCourse%26lc_id%3D42825%26lc_name%3D%E8%8C%85%E8%8E%BD%E8%8E' \
|
||||
b'%BD%E7%8C%AB%E6%B0%93%E7%8C%AB%E6%B0%93'
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3, 6), reason='requires Python 3.6 or higher')
|
||||
def test_parse_port_range():
|
||||
# Port out of range
|
||||
@ -61,6 +72,7 @@ def test_unparse():
|
||||
assert url.unparse("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
|
||||
assert url.unparse("https", "foo.com", 80, "") == "https://foo.com:80"
|
||||
assert url.unparse("https", "foo.com", 443, "") == "https://foo.com"
|
||||
assert url.unparse("https", "foo.com", 443, "*") == "https://foo.com"
|
||||
|
||||
|
||||
# We ignore the byte 126: '~' because of an incompatibility in Python 3.6 and 3.7
|
||||
@ -131,3 +143,7 @@ def test_unquote():
|
||||
assert url.unquote("foo") == "foo"
|
||||
assert url.unquote("foo%20bar") == "foo bar"
|
||||
assert url.unquote(surrogates_quoted) == surrogates
|
||||
|
||||
|
||||
def test_hostport():
|
||||
assert url.hostport(b"https", b"foo.com", 8080) == b"foo.com:8080"
|
||||
|
Loading…
Reference in New Issue
Block a user