Extract url functions from netlib.utils and move to netlib.http.url

This commit is contained in:
Aldo Cortesi 2016-05-31 18:42:56 +12:00
parent 08fbe6f111
commit 4e6c9c4e93
12 changed files with 186 additions and 182 deletions

View File

@ -6,7 +6,7 @@ import base64
import configargparse
from netlib.tcp import Address, sslversion_choices
import netlib.utils
import netlib.http.url
from . import filt, utils, version
from .proxy import config
@ -105,7 +105,7 @@ def parse_setheader(s):
def parse_server_spec(url):
try:
p = netlib.utils.parse_url(url)
p = netlib.http.url.parse_url(url)
if p[0] not in ("http", "https"):
raise ValueError()
except ValueError:

View File

@ -1,7 +1,7 @@
from __future__ import absolute_import
import urwid
import netlib.utils
import netlib.http.url
from . import common, signals
@ -343,7 +343,7 @@ class FlowListBox(urwid.ListBox):
)
def new_request(self, url, method):
parts = netlib.utils.parse_url(str(url))
parts = netlib.http.url.parse_url(str(url))
if not parts:
signals.status_message.send(message="Invalid Url")
return

View File

@ -27,7 +27,8 @@ import html2text
import six
from netlib.odict import ODict
from netlib import encoding
from netlib.utils import clean_bin, hexdump, urldecode, multipartdecode, parse_content_type
from netlib.http import url
from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type
from . import utils
from .exceptions import ContentViewException
from .contrib import jsbeautifier
@ -257,7 +258,7 @@ class ViewURLEncoded(View):
content_types = ["application/x-www-form-urlencoded"]
def __call__(self, data, **metadata):
d = urldecode(data)
d = url.urldecode(data)
return "URLEncoded form", format_dict(ODict(d))

View File

@ -14,8 +14,8 @@ from hyperframe.frame import PriorityFrame
from netlib.tcp import ssl_read_select
from netlib.exceptions import HttpException
from netlib.http import Headers
from netlib.utils import parse_url
from netlib.http.http2 import frame
import netlib.http.url
from .base import Layer
from .http import _HttpTransmissionLayer, HttpLayer
@ -320,7 +320,7 @@ class Http2SingleStreamLayer(_HttpTransmissionLayer, threading.Thread):
else: # pragma: no cover
first_line_format = "absolute"
# FIXME: verify if path or :host contains what we need
scheme, host, port, _ = parse_url(path)
scheme, host, port, _ = netlib.http.url.parse_url(path)
if authority:
host, _, port = authority.partition(':')

View File

@ -6,6 +6,7 @@ import re
from ... import utils
from ...exceptions import HttpReadDisconnect, HttpSyntaxException, HttpException, TcpDisconnect
from .. import Request, Response, Headers
from .. import url
def read_request(rfile, body_size_limit=None):
@ -240,7 +241,7 @@ def _read_request_line(rfile):
scheme, path = None, None
else:
form = "absolute"
scheme, host, port, path = utils.parse_url(path)
scheme, host, port, path = url.parse_url(path)
_check_http_version(http_version)
except ValueError:

View File

@ -6,7 +6,7 @@ import hyperframe.frame
from hpack.hpack import Encoder, Decoder
from ... import utils
from .. import Headers, Response, Request
from .. import Headers, Response, Request, url
from . import frame
@ -118,7 +118,7 @@ class HTTP2Protocol(object):
else:
first_line_format = "absolute"
# FIXME: verify if path or :host contains what we need
scheme, host, port, _ = utils.parse_url(path)
scheme, host, port, _ = url.parse_url(path)
scheme = scheme.decode('ascii')
host = host.decode('ascii')

View File

@ -6,6 +6,7 @@ import six
from six.moves import urllib
from netlib import utils
import netlib.http.url
from . import cookies
from .. import encoding
from ..multidict import MultiDictView
@ -179,11 +180,11 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.host, self.port)
return utils.unparse_url(self.scheme, self.host, self.port, self.path)
return netlib.http.url.unparse_url(self.scheme, self.host, self.port, self.path)
@url.setter
def url(self, url):
self.scheme, self.host, self.port, self.path = utils.parse_url(url)
self.scheme, self.host, self.port, self.path = netlib.http.url.parse_url(url)
def _parse_host_header(self):
"""Extract the host and port from Host header"""
@ -219,7 +220,7 @@ class Request(Message):
"""
if self.first_line_format == "authority":
return "%s:%d" % (self.pretty_host, self.port)
return utils.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
return netlib.http.url.unparse_url(self.scheme, self.pretty_host, self.port, self.path)
@property
def query(self):
@ -234,12 +235,12 @@ class Request(Message):
def _get_query(self):
_, _, _, _, query, _ = urllib.parse.urlparse(self.url)
return tuple(utils.urldecode(query))
return tuple(netlib.http.url.urldecode(query))
def _set_query(self, value):
query = utils.urlencode(value)
query = netlib.http.url.urlencode(value)
scheme, netloc, path, params, _, fragment = urllib.parse.urlparse(self.url)
_, _, _, self.path = utils.parse_url(
_, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
@query.setter
@ -287,7 +288,7 @@ class Request(Message):
components = map(lambda x: urllib.parse.quote(x, safe=""), components)
path = "/" + "/".join(components)
scheme, netloc, _, params, query, fragment = urllib.parse.urlparse(self.url)
_, _, _, self.path = utils.parse_url(
_, _, _, self.path = netlib.http.url.parse_url(
urllib.parse.urlunparse([scheme, netloc, path, params, query, fragment]))
def anticache(self):
@ -339,7 +340,7 @@ class Request(Message):
def _get_urlencoded_form(self):
is_valid_content_type = "application/x-www-form-urlencoded" in self.headers.get("content-type", "").lower()
if is_valid_content_type:
return tuple(utils.urldecode(self.content))
return tuple(netlib.http.url.urldecode(self.content))
return ()
def _set_urlencoded_form(self, value):
@ -348,7 +349,7 @@ class Request(Message):
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
self.content = utils.urlencode(value)
self.content = netlib.http.url.urlencode(value)
@urlencoded_form.setter
def urlencoded_form(self, value):

95
netlib/http/url.py Normal file
View File

@ -0,0 +1,95 @@
import six
from six.moves import urllib
from .. import utils
# PY2 workaround
def decode_parse_result(result, enc):
if hasattr(result, "decode"):
return result.decode(enc)
else:
return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
# PY2 workaround
def encode_parse_result(result, enc):
if hasattr(result, "encode"):
return result.encode(enc)
else:
return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
def parse_url(url):
"""
URL-parsing function that checks that
- port is an integer 0-65535
- host is a valid IDNA-encoded hostname with no null-bytes
- path is valid ASCII
Args:
A URL (as bytes or as unicode)
Returns:
A (scheme, host, port, path) tuple
Raises:
ValueError, if the URL is not properly formatted.
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
raise ValueError("No hostname given")
if isinstance(url, six.binary_type):
host = parsed.hostname
# this should not raise a ValueError,
# but we try to be very forgiving here and accept just everything.
# decode_parse_result(parsed, "ascii")
else:
host = parsed.hostname.encode("idna")
parsed = encode_parse_result(parsed, "ascii")
port = parsed.port
if not port:
port = 443 if parsed.scheme == b"https" else 80
full_path = urllib.parse.urlunparse(
(b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
)
if not full_path.startswith(b"/"):
full_path = b"/" + full_path
if not utils.is_valid_host(host):
raise ValueError("Invalid Host")
if not utils.is_valid_port(port):
raise ValueError("Invalid Port")
return parsed.scheme, host, port, full_path
def unparse_url(scheme, host, port, path=""):
"""
Returns a URL string, constructed from the specified components.
Args:
All args must be str.
"""
if path == "*":
path = ""
return "%s://%s%s" % (scheme, utils.hostport(scheme, host, port), path)
def urlencode(s):
"""
Takes a list of (key, value) tuples and returns a urlencoded string.
"""
s = [tuple(i) for i in s]
return urllib.parse.urlencode(s, False)
def urldecode(s):
"""
Takes a urlencoded string and returns a list of (key, value) tuples.
"""
return urllib.parse.parse_qsl(s, keep_blank_values=True)

View File

@ -8,9 +8,6 @@ import inspect
import six
from six.moves import urllib
def always_bytes(unicode_or_bytes, *encode_args):
if isinstance(unicode_or_bytes, six.text_type):
return unicode_or_bytes.encode(*encode_args)
@ -188,71 +185,6 @@ def is_valid_port(port):
return 0 <= port <= 65535
# PY2 workaround
def decode_parse_result(result, enc):
if hasattr(result, "decode"):
return result.decode(enc)
else:
return urllib.parse.ParseResult(*[x.decode(enc) for x in result])
# PY2 workaround
def encode_parse_result(result, enc):
if hasattr(result, "encode"):
return result.encode(enc)
else:
return urllib.parse.ParseResult(*[x.encode(enc) for x in result])
def parse_url(url):
"""
URL-parsing function that checks that
- port is an integer 0-65535
- host is a valid IDNA-encoded hostname with no null-bytes
- path is valid ASCII
Args:
A URL (as bytes or as unicode)
Returns:
A (scheme, host, port, path) tuple
Raises:
ValueError, if the URL is not properly formatted.
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
raise ValueError("No hostname given")
if isinstance(url, six.binary_type):
host = parsed.hostname
# this should not raise a ValueError,
# but we try to be very forgiving here and accept just everything.
# decode_parse_result(parsed, "ascii")
else:
host = parsed.hostname.encode("idna")
parsed = encode_parse_result(parsed, "ascii")
port = parsed.port
if not port:
port = 443 if parsed.scheme == b"https" else 80
full_path = urllib.parse.urlunparse(
(b"", b"", parsed.path, parsed.params, parsed.query, parsed.fragment)
)
if not full_path.startswith(b"/"):
full_path = b"/" + full_path
if not is_valid_host(host):
raise ValueError("Invalid Host")
if not is_valid_port(port):
raise ValueError("Invalid Port")
return parsed.scheme, host, port, full_path
def get_header_tokens(headers, key):
"""
Retrieve all tokens for a header key. A number of different headers
@ -278,33 +210,6 @@ def hostport(scheme, host, port):
return "%s:%d" % (host, port)
def unparse_url(scheme, host, port, path=""):
"""
Returns a URL string, constructed from the specified components.
Args:
All args must be str.
"""
if path == "*":
path = ""
return "%s://%s%s" % (scheme, hostport(scheme, host, port), path)
def urlencode(s):
"""
Takes a list of (key, value) tuples and returns a urlencoded string.
"""
s = [tuple(i) for i in s]
return urllib.parse.urlencode(s, False)
def urldecode(s):
"""
Takes a urlencoded string and returns a list of (key, value) tuples.
"""
return urllib.parse.parse_qsl(s, keep_blank_values=True)
def parse_content_type(c):
"""
A simple parser for content-type values. Returns a (type, subtype,

View File

@ -1,8 +1,8 @@
from mitmproxy.exceptions import ContentViewException
from netlib.http import Headers
from netlib.odict import ODict
import netlib.utils
from netlib import encoding
from netlib.http import url
import mitmproxy.contentviews as cv
from . import tutils
@ -60,10 +60,10 @@ class TestContentView:
assert f[0] == "Query"
def test_view_urlencoded(self):
d = netlib.utils.urlencode([("one", "two"), ("three", "four")])
d = url.urlencode([("one", "two"), ("three", "four")])
v = cv.ViewURLEncoded()
assert v(d)
d = netlib.utils.urlencode([("adsfa", "")])
d = url.urlencode([("adsfa", "")])
v = cv.ViewURLEncoded()
assert v(d)

View File

@ -0,0 +1,65 @@
from netlib import tutils
from netlib.http import url
def test_parse_url():
with tutils.raises(ValueError):
url.parse_url("")
s, h, po, pa = url.parse_url(b"http://foo.com:8888/test")
assert s == b"http"
assert h == b"foo.com"
assert po == 8888
assert pa == b"/test"
s, h, po, pa = url.parse_url("http://foo/bar")
assert s == b"http"
assert h == b"foo"
assert po == 80
assert pa == b"/bar"
s, h, po, pa = url.parse_url(b"http://user:pass@foo/bar")
assert s == b"http"
assert h == b"foo"
assert po == 80
assert pa == b"/bar"
s, h, po, pa = url.parse_url(b"http://foo")
assert pa == b"/"
s, h, po, pa = url.parse_url(b"https://foo")
assert po == 443
with tutils.raises(ValueError):
url.parse_url(b"https://foo:bar")
# Invalid IDNA
with tutils.raises(ValueError):
url.parse_url("http://\xfafoo")
# Invalid PATH
with tutils.raises(ValueError):
url.parse_url("http:/\xc6/localhost:56121")
# Null byte in host
with tutils.raises(ValueError):
url.parse_url("http://foo\0")
# Port out of range
_, _, port, _ = url.parse_url("http://foo:999999")
assert port == 80
# Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
with tutils.raises(ValueError):
url.parse_url('http://lo[calhost')
def test_unparse_url():
assert url.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
assert url.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
assert url.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
assert url.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
def test_urlencode():
assert url.urlencode([('foo', 'bar')])
def test_urldecode():
s = "one=two&three=four"
assert len(url.urldecode(s)) == 2

View File

@ -38,70 +38,6 @@ def test_pretty_size():
assert utils.pretty_size(1024 * 1024) == "1MB"
def test_parse_url():
with tutils.raises(ValueError):
utils.parse_url("")
s, h, po, pa = utils.parse_url(b"http://foo.com:8888/test")
assert s == b"http"
assert h == b"foo.com"
assert po == 8888
assert pa == b"/test"
s, h, po, pa = utils.parse_url("http://foo/bar")
assert s == b"http"
assert h == b"foo"
assert po == 80
assert pa == b"/bar"
s, h, po, pa = utils.parse_url(b"http://user:pass@foo/bar")
assert s == b"http"
assert h == b"foo"
assert po == 80
assert pa == b"/bar"
s, h, po, pa = utils.parse_url(b"http://foo")
assert pa == b"/"
s, h, po, pa = utils.parse_url(b"https://foo")
assert po == 443
with tutils.raises(ValueError):
utils.parse_url(b"https://foo:bar")
# Invalid IDNA
with tutils.raises(ValueError):
utils.parse_url("http://\xfafoo")
# Invalid PATH
with tutils.raises(ValueError):
utils.parse_url("http:/\xc6/localhost:56121")
# Null byte in host
with tutils.raises(ValueError):
utils.parse_url("http://foo\0")
# Port out of range
_, _, port, _ = utils.parse_url("http://foo:999999")
assert port == 80
# Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt
with tutils.raises(ValueError):
utils.parse_url('http://lo[calhost')
def test_unparse_url():
assert utils.unparse_url("http", "foo.com", 99, "") == "http://foo.com:99"
assert utils.unparse_url("http", "foo.com", 80, "/bar") == "http://foo.com/bar"
assert utils.unparse_url("https", "foo.com", 80, "") == "https://foo.com:80"
assert utils.unparse_url("https", "foo.com", 443, "") == "https://foo.com"
def test_urlencode():
assert utils.urlencode([('foo', 'bar')])
def test_urldecode():
s = "one=two&three=four"
assert len(utils.urldecode(s)) == 2
def test_get_header_tokens():
headers = Headers()
assert utils.get_header_tokens(headers, "foo") == []