mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-26 18:18:25 +00:00
Extract url functions from netlib.utils and move to netlib.http.url
This commit is contained in:
parent
08fbe6f111
commit
4e6c9c4e93
@ -6,7 +6,7 @@ import base64
|
||||
import configargparse
|
||||
|
||||
from netlib.tcp import Address, sslversion_choices
|
||||
import netlib.utils
|
||||
import netlib.http.url
|
||||
from . import filt, utils, version
|
||||
from .proxy import config
|
||||
|
||||
@ -105,7 +105,7 @@ def parse_setheader(s):
|
||||
|
||||
def parse_server_spec(url):
|
||||
try:
|
||||
p = netlib.utils.parse_url(url)
|
||||
p = netlib.http.url.parse_url(url)
|
||||
if p[0] not in ("http", "https"):
|
||||
raise ValueError()
|
||||
except ValueError:
|
||||
|
@ -1,7 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
import urwid
|
||||
|
||||
import netlib.utils
|
||||
import netlib.http.url
|
||||
|
||||
from . import common, signals
|
||||
|
||||
@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox):
|
||||
)
|
||||
|
||||
def new_request(self, url, method):
|
||||
parts = netlib.utils.parse_url(str(url))
|
||||
parts = netlib.http.url.parse_url(str(url))
|
||||
if not parts:
|
||||
signals.status_message.send(message="Invalid Url")
|
||||
return
|
||||
|
@ -27,7 +27,8 @@ import html2text
|
||||
import six
|
||||
from netlib.odict import ODict
|
||||
from netlib import encoding
|
||||
from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type
|
||||
from netlib.http import url
|
||||
from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type
|
||||
from . import utils
|
||||
from .exceptions import ContentViewException
|
||||
from .contrib import jsbeautifier
|
||||
@ -257,7 +258,7 @@ class ViewURLEncoded(View):
|
||||
content_types = ["application/x-www-form-urlencoded"]
|
||||
|
||||
def __call__(self, data, **metadata):
|
||||
d = urldecode(data)
|
||||
d = url.urldecode(data)
|
||||
return "URLEncoded form", format_dict(ODict(d))
|
||||
|
||||
|
||||
|
@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame
|
||||
from netlib.tcp import ssl_read_select
|
||||
from netlib.exceptions import HttpException
|
||||
from netlib.http import Headers
|
||||
from netlib.utils import parse_url
|
||||
from netlib.http.http2 import frame
|
||||
import netlib.http.url
|
||||
|
||||
from .base import Layer
|
||||
from .http import _HttpTransmissionLayer, HttpLayer
|
||||
@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread):
|
||||
else: # pragma: no cover
|
||||
first_line_format = "absolute"
|
||||
# FIXME: verify if path or :host contains what we need
|
||||
scheme, host, port, _ = parse_url(path)
|
||||
scheme, host, port, _ = netlib.http.url.parse_url(path)
|
||||
|
||||
if authority:
|
||||
host, _, port = authority.partition(':')
|
||||
|
@ -6,6 +6,7 @@ import re
|
||||
from ... import utils
|
||||
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
|
||||
from .. import Request, Response, Headers
|
||||
from .. import url
|
||||
|
||||
|
||||
def read_request(rfile, body_size_limit=None):
|
||||
@ -240,7 +241,7 @@ def _read_request_line(rfile):
|
||||
scheme, path = None, None
|
||||
else:
|
||||
form = "absolute"
|
||||
scheme, host, port, path = utils.parse_url(path)
|
||||
scheme, host, port, path = url.parse_url(path)
|
||||
|
||||
_check_http_version(http_version)
|
||||
except ValueError:
|
||||
|
@ -6,7 +6,7 @@ import hyperframe.frame
|
||||
|
||||
from hpack.hpack import Encoder, Decoder
|
||||
from ... import utils
|
||||
from .. import Headers, Response, Request
|
||||
from .. import Headers, Response, Request, url
|
||||
from . import frame
|
||||
|
||||
|
||||
@ -118,7 +118,7 @@ class HTTP2Protocol(object):
|
||||
else:
|
||||
first_line_format = "absolute"
|
||||
# FIXME: verify if path or :host contains what we need
|
||||
scheme, host, port, _ = utils.parse_url(path)
|
||||
scheme, host, port, _ = url.parse_url(path)
|
||||
scheme = scheme.decode('ascii')
|
||||
host = host.decode('ascii')
|
||||
|
||||
|
@ -6,6 +6,7 @@ import six
|
||||
from six.moves import urllib
|
||||
|
||||
from netlib import utils
|
||||
import netlib.http.url
|
||||
from . import cookies
|
||||
from .. import encoding
|
||||
from ..multidict import MultiDictView
|
||||
@ -179,11 +180,11 @@ class Request(Message):
|
||||
"""
|
||||
if self.first_line_format == "authority":
|
||||
return "%s:%d" % (self.host, self.port)
|
||||
return utils.unparse_url(self.scheme, self.host, self.port, self.path)
|
||||
return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path)
|
||||
|
||||
@url.setter
|
||||
def url(self, url):
|
||||
self.scheme, self.host, self.port, self.path = utils.parse_url(url)
|
||||
self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url)
|
||||
|
||||
def _parse_host_header(self):
|
||||
"""Extract the host and port from Host header"""
|
||||
@ -219,7 +220,7 @@ class Request(Message):
|
||||
"""
|
||||
if self.first_line_format == "authority":
|
||||
return "%s:%d" % (self.pretty_host, self.port)
|
||||
return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
|
||||
return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
|
||||
|
||||
@property
|
||||
def query(self):
|
||||
@ -234,12 +235,12 @@ class Request(Message):
|
||||
|
||||
def _get_query(self):
|
||||
_, _, _, _, query, _ = urllib.parse.urlparse(self.url)
|
||||
return tuple(utils.urldecode(query))
|
||||
return tuple(netlib.http.url.urldecode(query))
|
||||
|
||||
def _set_query(self, value):
|
||||
query = utils.urlencode(value)
|
||||
query = netlib.http.url.urlencode(value)
|
||||
scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
|
||||
_, _, _, self.path = utils.parse_url(
|
||||
_, _, _, self.path = netlib.http.url.parse_url(
|
||||
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
|
||||
|
||||
@query.setter
|
||||
@ -287,7 +288,7 @@ class Request(Message):
|
||||
components = map(lambda x: urllib.parse.quote(x, safe=""), components)
|
||||
path = "/" + "/".join(components)
|
||||
scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
|
||||
_, _, _, self.path = utils.parse_url(
|
||||
_, _, _, self.path = netlib.http.url.parse_url(
|
||||
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
|
||||
|
||||
def anticache(self):
|
||||
@ -339,7 +340,7 @@ class Request(Message):
|
||||
def _get_urlencoded_form(self):
|
||||
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
|
||||
if is_valid_content_type:
|
||||
return tuple(utils.urldecode(self.content))
|
||||
return tuple(netlib.http.url.urldecode(self.content))
|
||||
return ()
|
||||
|
||||
def _set_urlencoded_form(self, value):
|
||||
@ -348,7 +349,7 @@ class Request(Message):
|
||||
This will overwrite the existing content if there is one.
|
||||
"""
|
||||
self.headers["content-type"] = "application/x-www-form-urlencoded"
|
||||
self.content = utils.urlencode(value)
|
||||
self.content = netlib.http.url.urlencode(value)
|
||||
|
||||
@urlencoded_form.setter
|
||||
def urlencoded_form(self, value):
|
||||
|
95
netlib/http/url.py
Normal file
95
netlib/http/url.py
Normal file
@ -0,0 +1,95 @@
|
||||
import six
|
||||
from six.moves import urllib
|
||||
|
||||
from .. import utils
|
||||
|
||||
# PY2 workaround
|
||||
def decode_parse_result(result, enc):
|
||||
if hasattr(result, "decode"):
|
||||
return result.decode(enc)
|
||||
else:
|
||||
return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
|
||||
|
||||
|
||||
# PY2 workaround
|
||||
def encode_parse_result(result, enc):
|
||||
if hasattr(result, "encode"):
|
||||
return result.encode(enc)
|
||||
else:
|
||||
return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
|
||||
|
||||
|
||||
def parse_url(url):
|
||||
"""
|
||||
URL-parsing function that checks that
|
||||
- port is an integer 0-65535
|
||||
- host is a valid IDNA-encoded hostname with no null-bytes
|
||||
- path is valid ASCII
|
||||
|
||||
Args:
|
||||
A URL (as bytes or as unicode)
|
||||
|
||||
Returns:
|
||||
A (scheme, host, port, path) tuple
|
||||
|
||||
Raises:
|
||||
ValueError, if the URL is not properly formatted.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
|
||||
if not parsed.hostname:
|
||||
raise ValueError("No hostname given")
|
||||
|
||||
if isinstance(url, six.binary_type):
|
||||
host = parsed.hostname
|
||||
|
||||
# this should not raise a ValueError,
|
||||
# but we try to be very forgiving here and accept just everything.
|
||||
# decode_parse_result(parsed, "ascii")
|
||||
else:
|
||||
host = parsed.hostname.encode("idna")
|
||||
parsed = encode_parse_result(parsed, "ascii")
|
||||
|
||||
port = parsed.port
|
||||
if not port:
|
||||
port = 443 if parsed.scheme == b"https" else 80
|
||||
|
||||
full_path = urllib.parse.urlunparse(
|
||||
(b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
|
||||
)
|
||||
if not full_path.startswith(b"/"):
|
||||
full_path = b"/" + full_path
|
||||
|
||||
if not utils.is_valid_host(host):
|
||||
raise ValueError("Invalid Host")
|
||||
if not utils.is_valid_port(port):
|
||||
raise ValueError("Invalid Port")
|
||||
|
||||
return parsed.scheme, host, port, full_path
|
||||
|
||||
|
||||
def unparse_url(scheme, host, port, path=""):
|
||||
"""
|
||||
Returns a URL string, constructed from the specified components.
|
||||
|
||||
Args:
|
||||
All args must be str.
|
||||
"""
|
||||
if path == "*":
|
||||
path = ""
|
||||
return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
|
||||
|
||||
|
||||
def urlencode(s):
|
||||
"""
|
||||
Takes a list of (key, value) tuples and returns a urlencoded string.
|
||||
"""
|
||||
s = [tuple(i) for i in s]
|
||||
return urllib.parse.urlencode(s, False)
|
||||
|
||||
|
||||
def urldecode(s):
|
||||
"""
|
||||
Takes a urlencoded string and returns a list of (key, value) tuples.
|
||||
"""
|
||||
return urllib.parse.parse_qsl(s, keep_blank_values=True)
|
@ -8,9 +8,6 @@ import inspect
|
||||
|
||||
import six
|
||||
|
||||
from six.moves import urllib
|
||||
|
||||
|
||||
def always_bytes(unicode_or_bytes, *encode_args):
|
||||
if isinstance(unicode_or_bytes, six.text_type):
|
||||
return unicode_or_bytes.encode(*encode_args)
|
||||
@ -188,71 +185,6 @@ def is_valid_port(port):
|
||||
return 0 <= port <= 65535
|
||||
|
||||
|
||||
# PY2 workaround
|
||||
def decode_parse_result(result, enc):
|
||||
if hasattr(result, "decode"):
|
||||
return result.decode(enc)
|
||||
else:
|
||||
return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
|
||||
|
||||
|
||||
# PY2 workaround
|
||||
def encode_parse_result(result, enc):
|
||||
if hasattr(result, "encode"):
|
||||
return result.encode(enc)
|
||||
else:
|
||||
return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
|
||||
|
||||
|
||||
def parse_url(url):
|
||||
"""
|
||||
URL-parsing function that checks that
|
||||
- port is an integer 0-65535
|
||||
- host is a valid IDNA-encoded hostname with no null-bytes
|
||||
- path is valid ASCII
|
||||
|
||||
Args:
|
||||
A URL (as bytes or as unicode)
|
||||
|
||||
Returns:
|
||||
A (scheme, host, port, path) tuple
|
||||
|
||||
Raises:
|
||||
ValueError, if the URL is not properly formatted.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
|
||||
if not parsed.hostname:
|
||||
raise ValueError("No hostname given")
|
||||
|
||||
if isinstance(url, six.binary_type):
|
||||
host = parsed.hostname
|
||||
|
||||
# this should not raise a ValueError,
|
||||
# but we try to be very forgiving here and accept just everything.
|
||||
# decode_parse_result(parsed, "ascii")
|
||||
else:
|
||||
host = parsed.hostname.encode("idna")
|
||||
parsed = encode_parse_result(parsed, "ascii")
|
||||
|
||||
port = parsed.port
|
||||
if not port:
|
||||
port = 443 if parsed.scheme == b"https" else 80
|
||||
|
||||
full_path = urllib.parse.urlunparse(
|
||||
(b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
|
||||
)
|
||||
if not full_path.startswith(b"/"):
|
||||
full_path = b"/" + full_path
|
||||
|
||||
if not is_valid_host(host):
|
||||
raise ValueError("Invalid Host")
|
||||
if not is_valid_port(port):
|
||||
raise ValueError("Invalid Port")
|
||||
|
||||
return parsed.scheme, host, port, full_path
|
||||
|
||||
|
||||
def get_header_tokens(headers, key):
|
||||
"""
|
||||
Retrieve all tokens for a header key. A number of different headers
|
||||
@ -278,33 +210,6 @@ def hostport(scheme, host, port):
|
||||
return "%s:%d" % (host, port)
|
||||
|
||||
|
||||
def unparse_url(scheme, host, port, path=""):
|
||||
"""
|
||||
Returns a URL string, constructed from the specified components.
|
||||
|
||||
Args:
|
||||
All args must be str.
|
||||
"""
|
||||
if path == "*":
|
||||
path = ""
|
||||
return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
|
||||
|
||||
|
||||
def urlencode(s):
|
||||
"""
|
||||
Takes a list of (key, value) tuples and returns a urlencoded string.
|
||||
"""
|
||||
s = [tuple(i) for i in s]
|
||||
return urllib.parse.urlencode(s, False)
|
||||
|
||||
|
||||
def urldecode(s):
|
||||
"""
|
||||
Takes a urlencoded string and returns a list of (key, value) tuples.
|
||||
"""
|
||||
return urllib.parse.parse_qsl(s, keep_blank_values=True)
|
||||
|
||||
|
||||
def parse_content_type(c):
|
||||
"""
|
||||
A simple parser for content-type values. Returns a (type, subtype,
|
||||
|
@ -1,8 +1,8 @@
|
||||
from mitmproxy.exceptions import ContentViewException
|
||||
from netlib.http import Headers
|
||||
from netlib.odict import ODict
|
||||
import netlib.utils
|
||||
from netlib import encoding
|
||||
from netlib.http import url
|
||||
|
||||
import mitmproxy.contentviews as cv
|
||||
from . import tutils
|
||||
@ -60,10 +60,10 @@ class TestContentView:
|
||||
assert f[0] == "Query"
|
||||
|
||||
def test_view_urlencoded(self):
|
||||
d = netlib.utils.urlencode([("one", "two"), ("three", "four")])
|
||||
d = url.urlencode([("one", "two"), ("three", "four")])
|
||||
v = cv.ViewURLEncoded()
|
||||
assert v(d)
|
||||
d = netlib.utils.urlencode([("adsfa", "")])
|
||||
d = url.urlencode([("adsfa", "")])
|
||||
v = cv.ViewURLEncoded()
|
||||
assert v(d)
|
||||
|
||||
|
65
test/netlib/http/test_url.py
Normal file
65
test/netlib/http/test_url.py
Normal file
@ -0,0 +1,65 @@
|
||||
from netlib import tutils
|
||||
from netlib.http import url
|
||||
|
||||
def test_parse_url():
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url("")
|
||||
|
||||
s, h, po, pa = url.parse_url(b"http://foo.com:8888/test")
|
||||
assert s == b"http"
|
||||
assert h == b"foo.com"
|
||||
assert po == 8888
|
||||
assert pa == b"/test"
|
||||
|
||||
s, h, po, pa = url.parse_url("http://foo/bar")
|
||||
assert s == b"http"
|
||||
assert h == b"foo"
|
||||
assert po == 80
|
||||
assert pa == b"/bar"
|
||||
|
||||
s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar")
|
||||
assert s == b"http"
|
||||
assert h == b"foo"
|
||||
assert po == 80
|
||||
assert pa == b"/bar"
|
||||
|
||||
s, h, po, pa = url.parse_url(b"http://foo")
|
||||
assert pa == b"/"
|
||||
|
||||
s, h, po, pa = url.parse_url(b"https://foo")
|
||||
assert po == 443
|
||||
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url(b"https://foo:bar")
|
||||
|
||||
# Invalid IDNA
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url("http://\xfafoo")
|
||||
# Invalid PATH
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url("http:/\xc6/localhost:56121")
|
||||
# Null byte in host
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url("http://foo\0")
|
||||
# Port out of range
|
||||
_, _, port, _ = url.parse_url("http://foo:999999")
|
||||
assert port == 80
|
||||
# Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
|
||||
with tutils.raises(ValueError):
|
||||
url.parse_url('http://lo[calhost')
|
||||
|
||||
|
||||
def test_unparse_url():
|
||||
assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
|
||||
assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
|
||||
assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
|
||||
assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
|
||||
|
||||
|
||||
def test_urlencode():
|
||||
assert url.urlencode([('foo', 'bar')])
|
||||
|
||||
|
||||
def test_urldecode():
|
||||
s = "one=two&three=four"
|
||||
assert len(url.urldecode(s)) == 2
|
@ -38,70 +38,6 @@ def test_pretty_size():
|
||||
assert utils.pretty_size(1024 * 1024) == "1MB"
|
||||
|
||||
|
||||
def test_parse_url():
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url("")
|
||||
|
||||
s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test")
|
||||
assert s == b"http"
|
||||
assert h == b"foo.com"
|
||||
assert po == 8888
|
||||
assert pa == b"/test"
|
||||
|
||||
s, h, po, pa = utils.parse_url("http://foo/bar")
|
||||
assert s == b"http"
|
||||
assert h == b"foo"
|
||||
assert po == 80
|
||||
assert pa == b"/bar"
|
||||
|
||||
s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar")
|
||||
assert s == b"http"
|
||||
assert h == b"foo"
|
||||
assert po == 80
|
||||
assert pa == b"/bar"
|
||||
|
||||
s, h, po, pa = utils.parse_url(b"http://foo")
|
||||
assert pa == b"/"
|
||||
|
||||
s, h, po, pa = utils.parse_url(b"https://foo")
|
||||
assert po == 443
|
||||
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url(b"https://foo:bar")
|
||||
|
||||
# Invalid IDNA
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url("http://\xfafoo")
|
||||
# Invalid PATH
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url("http:/\xc6/localhost:56121")
|
||||
# Null byte in host
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url("http://foo\0")
|
||||
# Port out of range
|
||||
_, _, port, _ = utils.parse_url("http://foo:999999")
|
||||
assert port == 80
|
||||
# Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
|
||||
with tutils.raises(ValueError):
|
||||
utils.parse_url('http://lo[calhost')
|
||||
|
||||
|
||||
def test_unparse_url():
|
||||
assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
|
||||
assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
|
||||
assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
|
||||
assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
|
||||
|
||||
|
||||
def test_urlencode():
|
||||
assert utils.urlencode([('foo', 'bar')])
|
||||
|
||||
|
||||
def test_urldecode():
|
||||
s = "one=two&three=four"
|
||||
assert len(utils.urldecode(s)) == 2
|
||||
|
||||
|
||||
def test_get_header_tokens():
|
||||
headers = Headers()
|
||||
assert utils.get_header_tokens(headers, "foo") == []
|
||||
|
Loading…
Reference in New Issue
Block a user