mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-23 08:11:00 +00:00
1ffc273c94
- Move more stuff that belongs in netlib.human - Move some stuff to near the only use - Zap mitmproxy.utils.timestamp(). I see the rationale, but we used it interchangeably with time.time() throughout the project. Since time.time() dominates in the codebase and timestamp() is such low utility, away it goes.
110 lines
2.8 KiB
Python
110 lines
2.8 KiB
Python
import six
|
|
from six.moves import urllib
|
|
|
|
from netlib import utils
|
|
|
|
|
|
# PY2 workaround
|
|
def decode_parse_result(result, enc):
|
|
if hasattr(result, "decode"):
|
|
return result.decode(enc)
|
|
else:
|
|
return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
|
|
|
|
|
|
# PY2 workaround
|
|
def encode_parse_result(result, enc):
|
|
if hasattr(result, "encode"):
|
|
return result.encode(enc)
|
|
else:
|
|
return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
|
|
|
|
|
|
def parse(url):
|
|
"""
|
|
URL-parsing function that checks that
|
|
- port is an integer 0-65535
|
|
- host is a valid IDNA-encoded hostname with no null-bytes
|
|
- path is valid ASCII
|
|
|
|
Args:
|
|
A URL (as bytes or as unicode)
|
|
|
|
Returns:
|
|
A (scheme, host, port, path) tuple
|
|
|
|
Raises:
|
|
ValueError, if the URL is not properly formatted.
|
|
"""
|
|
parsed = urllib.parse.urlparse(url)
|
|
|
|
if not parsed.hostname:
|
|
raise ValueError("No hostname given")
|
|
|
|
if isinstance(url, six.binary_type):
|
|
host = parsed.hostname
|
|
|
|
# this should not raise a ValueError,
|
|
# but we try to be very forgiving here and accept just everything.
|
|
# decode_parse_result(parsed, "ascii")
|
|
else:
|
|
host = parsed.hostname.encode("idna")
|
|
parsed = encode_parse_result(parsed, "ascii")
|
|
|
|
port = parsed.port
|
|
if not port:
|
|
port = 443 if parsed.scheme == b"https" else 80
|
|
|
|
full_path = urllib.parse.urlunparse(
|
|
(b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
|
|
)
|
|
if not full_path.startswith(b"/"):
|
|
full_path = b"/" + full_path
|
|
|
|
if not utils.is_valid_host(host):
|
|
raise ValueError("Invalid Host")
|
|
if not utils.is_valid_port(port):
|
|
raise ValueError("Invalid Port")
|
|
|
|
return parsed.scheme, host, port, full_path
|
|
|
|
|
|
def unparse(scheme, host, port, path=""):
|
|
"""
|
|
Returns a URL string, constructed from the specified components.
|
|
|
|
Args:
|
|
All args must be str.
|
|
"""
|
|
if path == "*":
|
|
path = ""
|
|
return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
|
|
|
|
|
|
def encode(s):
|
|
"""
|
|
Takes a list of (key, value) tuples and returns a urlencoded string.
|
|
"""
|
|
s = [tuple(i) for i in s]
|
|
return urllib.parse.urlencode(s, False)
|
|
|
|
|
|
def decode(s):
|
|
"""
|
|
Takes a urlencoded string and returns a list of (key, value) tuples.
|
|
"""
|
|
return urllib.parse.parse_qsl(s, keep_blank_values=True)
|
|
|
|
|
|
def hostport(scheme, host, port):
|
|
"""
|
|
Returns the host component, with a port specifcation if needed.
|
|
"""
|
|
if (port, scheme) in [(80, "http"), (443, "https"), (80, b"http"), (443, b"https")]:
|
|
return host
|
|
else:
|
|
if isinstance(host, six.binary_type):
|
|
return b"%s:%d" % (host, port)
|
|
else:
|
|
return "%s:%d" % (host, port)
|