utils.multipartdecode -> http.multipart.decode

also utils.parse_content_type -> http.headers.parse_content_type
2025-01-30 14:58:38 +00:00 · 2016-05-31 19:07:55 +12:00 · 2016-05-31 19:07:55 +12:00 · ec34cae618
commit ec34cae618
parent 15b2374ef9
9 changed files with 101 additions and 95 deletions
--- a/mitmproxy/contentviews.py
+++ b/mitmproxy/contentviews.py
@ -27,8 +27,9 @@ import html2text
 import six
 from netlib.odict import ODict
 from netlib import encoding
-from netlib.http import url
-from netlib.utils import clean_bin, hexdump, multipartdecode, parse_content_type
+import netlib.http.headers
+from netlib.http import url, multipart
+from netlib.utils import clean_bin, hexdump
 from . import utils
 from .exceptions import ContentViewException
 from .contrib import jsbeautifier
@ -121,7 +122,7 @@ class ViewAuto(View):
        headers = metadata.get("headers", {})
        ctype = headers.get("content-type")
        if data and ctype:
-            ct = parse_content_type(ctype) if ctype else None
+            ct = netlib.http.headers.parse_content_type(ctype) if ctype else None
            ct = "%s/%s" % (ct[0], ct[1])
            if ct in content_types_map:
                return content_types_map[ct][0](data, **metadata)
@ -275,7 +276,7 @@ class ViewMultipart(View):

    def __call__(self, data, **metadata):
        headers = metadata.get("headers", {})
-        v = multipartdecode(headers, data)
+        v = multipart.decode(headers, data)
        if v:
            return "Multipart form", self._format(v)

--- a/mitmproxy/flow/export.py
+++ b/mitmproxy/flow/export.py
@ -5,7 +5,7 @@ from textwrap import dedent
 from six.moves.urllib.parse import quote, quote_plus

 import netlib.http
-from netlib.utils import parse_content_type
+import netlib.http.headers


 def curl_command(flow):
@ -88,7 +88,7 @@ def raw_request(flow):

 def is_json(headers, content):
    if headers:
-        ct = parse_content_type(headers.get("content-type", ""))
+        ct = netlib.http.headers.parse_content_type(headers.get("content-type", ""))
        if ct and "%s/%s" % (ct[0], ct[1]) == "application/json":
            try:
                return json.loads(content)
--- a/netlib/http/headers.py
+++ b/netlib/http/headers.py
@ -175,3 +175,30 @@ class Headers(MultiDict):
            fields.append([name, value])
        self.fields = fields
        return replacements
+
+        
+def parse_content_type(c):
+    """
+        A simple parser for content-type values. Returns a (type, subtype,
+        parameters) tuple, where type and subtype are strings, and parameters
+        is a dict. If the string could not be parsed, return None.
+
+        E.g. the following string:
+
+            text/html; charset=UTF-8
+
+        Returns:
+
+            ("text", "html", {"charset": "UTF-8"})
+    """
+    parts = c.split(";", 1)
+    ts = parts[0].split("/", 1)
+    if len(ts) != 2:
+        return None
+    d = {}
+    if len(parts) == 2:
+        for i in parts[1].split(";"):
+            clause = i.split("=", 1)
+            if len(clause) == 2:
+                d[clause[0].strip()] = clause[1].strip()
+    return ts[0].lower(), ts[1].lower(), d
--- a/netlib/http/multipart.py
+++ b/netlib/http/multipart.py
@ -0,0 +1,32 @@
+import re
+
+from . import headers
+
+
+def decode(hdrs, content):
+    """
+        Takes a multipart boundary encoded string and returns list of (key, value) tuples.
+    """
+    v = hdrs.get("content-type")
+    if v:
+        v = headers.parse_content_type(v)
+        if not v:
+            return []
+        try:
+            boundary = v[2]["boundary"].encode("ascii")
+        except (KeyError, UnicodeError):
+            return []
+
+        rx = re.compile(br'\bname="([^"]+)"')
+        r = []
+
+        for i in content.split(b"--" + boundary):
+            parts = i.splitlines()
+            if len(parts) > 1 and parts[0][0:2] != b"--":
+                match = rx.search(parts[1])
+                if match:
+                    key = match.group(1)
+                    value = b"".join(parts[3 + parts[2:].index(b""):])
+                    r.append((key, value))
+        return r
+    return []
--- a/netlib/http/request.py
+++ b/netlib/http/request.py
@ -7,6 +7,7 @@ from six.moves import urllib

 from netlib import utils
 import netlib.http.url
+from netlib.http import multipart
 from . import cookies
 from .. import encoding
 from ..multidict import MultiDictView
@ -369,7 +370,7 @@ class Request(Message):
    def _get_multipart_form(self):
        is_valid_content_type = "multipart/form-data" in self.headers.get("content-type", "").lower()
        if is_valid_content_type:
-            return utils.multipartdecode(self.headers, self.content)
+            return multipart.decode(self.headers, self.content)
        return ()

    def _set_multipart_form(self, value):
--- a/netlib/utils.py
+++ b/netlib/utils.py
@ -190,62 +190,6 @@ def hostport(scheme, host, port):
            return "%s:%d" % (host, port)


-def parse_content_type(c):
-    """
-        A simple parser for content-type values. Returns a (type, subtype,
-        parameters) tuple, where type and subtype are strings, and parameters
-        is a dict. If the string could not be parsed, return None.
-
-        E.g. the following string:
-
-            text/html; charset=UTF-8
-
-        Returns:
-
-            ("text", "html", {"charset": "UTF-8"})
-    """
-    parts = c.split(";", 1)
-    ts = parts[0].split("/", 1)
-    if len(ts) != 2:
-        return None
-    d = {}
-    if len(parts) == 2:
-        for i in parts[1].split(";"):
-            clause = i.split("=", 1)
-            if len(clause) == 2:
-                d[clause[0].strip()] = clause[1].strip()
-    return ts[0].lower(), ts[1].lower(), d
-
-
-def multipartdecode(headers, content):
-    """
-        Takes a multipart boundary encoded string and returns list of (key, value) tuples.
-    """
-    v = headers.get("content-type")
-    if v:
-        v = parse_content_type(v)
-        if not v:
-            return []
-        try:
-            boundary = v[2]["boundary"].encode("ascii")
-        except (KeyError, UnicodeError):
-            return []
-
-        rx = re.compile(br'\bname="([^"]+)"')
-        r = []
-
-        for i in content.split(b"--" + boundary):
-            parts = i.splitlines()
-            if len(parts) > 1 and parts[0][0:2] != b"--":
-                match = rx.search(parts[1])
-                if match:
-                    key = match.group(1)
-                    value = b"".join(parts[3 + parts[2:].index(b""):])
-                    r.append((key, value))
-        return r
-    return []
-
-
 def safe_subn(pattern, repl, target, *args, **kwargs):
    """
        There are Unicode conversion problems with re.subn. We try to smooth
--- a/test/netlib/http/test_headers.py
+++ b/test/netlib/http/test_headers.py
@ -1,4 +1,5 @@
 from netlib.http import Headers
+from netlib.http.headers import parse_content_type
 from netlib.tutils import raises


@ -72,3 +73,12 @@ class TestHeaders(object):
        replacements = headers.replace(r"Host: ", "X-Host ")
        assert replacements == 0
        assert headers["Host"] == "example.com"
+
+
+def test_parse_content_type():
+    p = parse_content_type
+    assert p("text/html") == ("text", "html", {})
+    assert p("text") is None
+
+    v = p("text/html; charset=UTF-8")
+    assert v == ('text', 'html', {'charset': 'UTF-8'})
--- a/test/netlib/http/test_multipart.py
+++ b/test/netlib/http/test_multipart.py
@ -0,0 +1,23 @@
+from netlib.http import Headers
+from netlib.http import multipart
+
+def test_decode():
+    boundary = 'somefancyboundary'
+    headers = Headers(
+        content_type='multipart/form-data; boundary=' + boundary
+    )
+    content = (
+        "--{0}\n"
+        "Content-Disposition: form-data; name=\"field1\"\n\n"
+        "value1\n"
+        "--{0}\n"
+        "Content-Disposition: form-data; name=\"field2\"\n\n"
+        "value2\n"
+        "--{0}--".format(boundary).encode()
+    )
+
+    form = multipart.decode(headers, content)
+
+    assert len(form) == 2
+    assert form[0] == (b"field1", b"value1")
+    assert form[1] == (b"field2", b"value2")
--- a/test/netlib/test_utils.py
+++ b/test/netlib/test_utils.py
@ -1,7 +1,6 @@
 # coding=utf-8

 from netlib import utils, tutils
-from netlib.http import Headers


 def test_bidi():
@ -38,37 +37,6 @@ def test_pretty_size():
    assert utils.pretty_size(1024 * 1024) == "1MB"


-def test_multipartdecode():
-    boundary = 'somefancyboundary'
-    headers = Headers(
-        content_type='multipart/form-data; boundary=' + boundary
-    )
-    content = (
-        "--{0}\n"
-        "Content-Disposition: form-data; name=\"field1\"\n\n"
-        "value1\n"
-        "--{0}\n"
-        "Content-Disposition: form-data; name=\"field2\"\n\n"
-        "value2\n"
-        "--{0}--".format(boundary).encode()
-    )
-
-    form = utils.multipartdecode(headers, content)
-
-    assert len(form) == 2
-    assert form[0] == (b"field1", b"value1")
-    assert form[1] == (b"field2", b"value2")
-
-
-def test_parse_content_type():
-    p = utils.parse_content_type
-    assert p("text/html") == ("text", "html", {})
-    assert p("text") is None
-
-    v = p("text/html; charset=UTF-8")
-    assert v == ('text', 'html', {'charset': 'UTF-8'})
-
-
 def test_safe_subn():
    assert utils.safe_subn("foo", u"bar", "\xc2foo")