From cec8c67465206caa5b4040c5f34050c9ef06b687 Mon Sep 17 00:00:00 2001 From: rjt-gupta Date: Sun, 3 Feb 2019 00:49:53 +0530 Subject: [PATCH 1/2] non ascii fix and tests --- test/mitmproxy/net/http/test_url.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/mitmproxy/net/http/test_url.py b/test/mitmproxy/net/http/test_url.py index ecf8e8969..482778591 100644 --- a/test/mitmproxy/net/http/test_url.py +++ b/test/mitmproxy/net/http/test_url.py @@ -49,6 +49,17 @@ def test_parse(): url.parse('http://lo[calhost') +def test_ascii_check(): + + test_url = "https://xyz.tax-edu.net?flag=selectCourse&lc_id=42825&lc_name=茅莽莽猫氓猫氓".encode() + scheme, host, port, full_path = url.parse(test_url) + assert scheme == b'https' + assert host == b'xyz.tax-edu.net' + assert port == 443 + assert full_path == b'/?flag%3DselectCourse%26lc_id%3D42825%26lc_name%3D%E8%8C%85%E8%8E%BD%E8%8E' \ + b'%BD%E7%8C%AB%E6%B0%93%E7%8C%AB%E6%B0%93' + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires Python 3.6 or higher') def test_parse_port_range(): # Port out of range @@ -61,6 +72,7 @@ def test_unparse(): assert url.unparse("http", "foo.com", 80, "/bar") == "http://foo.com/bar" assert url.unparse("https", "foo.com", 80, "") == "https://foo.com:80" assert url.unparse("https", "foo.com", 443, "") == "https://foo.com" + assert url.unparse("https", "foo.com", 443, "*") == "https://foo.com" # We ignore the byte 126: '~' because of an incompatibility in Python 3.6 and 3.7 @@ -131,3 +143,7 @@ def test_unquote(): assert url.unquote("foo") == "foo" assert url.unquote("foo%20bar") == "foo bar" assert url.unquote(surrogates_quoted) == surrogates + + +def test_hostport(): + assert url.hostport(b"https", b"foo.com", 8080) == b"foo.com:8080" From ba054b15f367d59139ec78fe03dc1c7d8fb099b5 Mon Sep 17 00:00:00 2001 From: rjt-gupta Date: Sun, 3 Feb 2019 00:53:05 +0530 Subject: [PATCH 2/2] url-fix --- mitmproxy/net/http/url.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/mitmproxy/net/http/url.py b/mitmproxy/net/http/url.py index f938cb12d..d8e14aeb4 100644 --- a/mitmproxy/net/http/url.py +++ b/mitmproxy/net/http/url.py @@ -21,16 +21,25 @@ def parse(url): Raises: ValueError, if the URL is not properly formatted. """ - parsed = urllib.parse.urlparse(url) + # Size of Ascii character after encoding is 1 byte which is same as its size + # But non-Ascii character's size after encoding will be more than its size + def ascii_check(l): + if len(l) == len(str(l).encode()): + return True + return False + if isinstance(url, bytes): + url = url.decode() + if not ascii_check(url): + url = urllib.parse.urlsplit(url) + url = list(url) + url[3] = urllib.parse.quote(url[3]) + url = urllib.parse.urlunsplit(url) + + parsed = urllib.parse.urlparse(url) if not parsed.hostname: raise ValueError("No hostname given") - if isinstance(url, bytes): - host = parsed.hostname - - # this should not raise a ValueError, - # but we try to be very forgiving here and accept just everything. else: host = parsed.hostname.encode("idna") if isinstance(parsed, urllib.parse.ParseResult):