mitmproxy/netlib/http/headers.py

from __future__ import absolute_import, print_function, division

import re

import six
from netlib import multidict
from netlib import strutils

# See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/

if six.PY2:  # pragma: no cover
    def _native(x):
        return x

    def _always_bytes(x):
        return x
else:
    # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
    def _native(x):
        return x.decode("utf-8", "surrogateescape")

    def _always_bytes(x):
        return strutils.always_bytes(x, "utf-8", "surrogateescape")


class Headers(multidict.MultiDict):
    """
    Header class which allows both convenient access to individual headers as well as
    direct access to the underlying raw data. Provides a full dictionary interface.

    Example:

    .. code-block:: python

        # Create headers with keyword arguments
        >>> h = Headers(host="example.com", content_type="application/xml")

        # Headers mostly behave like a normal dict.
        >>> h["Host"]
        "example.com"

        # HTTP Headers are case insensitive
        >>> h["host"]
        "example.com"

        # Headers can also be created from a list of raw (header_name, header_value) byte tuples
        >>> h = Headers([
            (b"Host",b"example.com"),
            (b"Accept",b"text/html"),
            (b"accept",b"application/xml")
        ])

        # Multiple headers are folded into a single header as per RFC7230
        >>> h["Accept"]
        "text/html, application/xml"

        # Setting a header removes all existing headers with the same name.
        >>> h["Accept"] = "application/text"
        >>> h["Accept"]
        "application/text"

        # bytes(h) returns a HTTP1 header block.
        >>> print(bytes(h))
        Host: example.com
        Accept: application/text

        # For full control, the raw header fields can be accessed
        >>> h.fields

    Caveats:
        For use with the "Set-Cookie" header, see :py:meth:`get_all`.
    """

    def __init__(self, fields=(), **headers):
        """
        Args:
            fields: (optional) list of ``(name, value)`` header byte tuples,
                e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
            **headers: Additional headers to set. Will overwrite existing values from `fields`.
                For convenience, underscores in header names will be transformed to dashes -
                this behaviour does not extend to other methods.
                If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
                the behavior is undefined.
        """
        super(Headers, self).__init__(fields)

        for key, value in self.fields:
            if not isinstance(key, bytes) or not isinstance(value, bytes):
                raise TypeError("Header fields must be bytes.")

        # content_type -> content-type
        headers = {
            _always_bytes(name).replace(b"_", b"-"): _always_bytes(value)
            for name, value in six.iteritems(headers)
        }
        self.update(headers)

    @staticmethod
    def _reduce_values(values):
        # Headers can be folded
        return ", ".join(values)

    @staticmethod
    def _kconv(key):
        # Headers are case-insensitive
        return key.lower()

    def __bytes__(self):
        if self.fields:
            return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
        else:
            return b""

    if six.PY2:  # pragma: no cover
        __str__ = __bytes__

    def __delitem__(self, key):
        key = _always_bytes(key)
        super(Headers, self).__delitem__(key)

    def __iter__(self):
        for x in super(Headers, self).__iter__():
            yield _native(x)

    def get_all(self, name):
        """
        Like :py:meth:`get`, but does not fold multiple headers into a single one.
        This is useful for Set-Cookie headers, which do not support folding.
        See also: https://tools.ietf.org/html/rfc7230#section-3.2.2
        """
        name = _always_bytes(name)
        return [
            _native(x) for x in
            super(Headers, self).get_all(name)
        ]

    def set_all(self, name, values):
        """
        Explicitly set multiple headers for the given key.
        See: :py:meth:`get_all`
        """
        name = _always_bytes(name)
        values = [_always_bytes(x) for x in values]
        return super(Headers, self).set_all(name, values)

    def insert(self, index, key, value):
        key = _always_bytes(key)
        value = _always_bytes(value)
        super(Headers, self).insert(index, key, value)

    def items(self, multi=False):
        if multi:
            return (
                (_native(k), _native(v))
                for k, v in self.fields
            )
        else:
            return super(Headers, self).items()

    def replace(self, pattern, repl, flags=0):
        """
        Replaces a regular expression pattern with repl in each "name: value"
        header line.

        Returns:
            The number of replacements made.
        """
        if isinstance(pattern, six.text_type):
            pattern = strutils.escaped_str_to_bytes(pattern)
        if isinstance(repl, six.text_type):
            repl = strutils.escaped_str_to_bytes(repl)
        pattern = re.compile(pattern, flags)
        replacements = 0

        fields = []
        for name, value in self.fields:
            line, n = pattern.subn(repl, name + b": " + value)
            try:
                name, value = line.split(b": ", 1)
            except ValueError:
                # We get a ValueError if the replacement removed the ": "
                # There's not much we can do about this, so we just keep the header as-is.
                pass
            else:
                replacements += n
            fields.append([name, value])
        self.fields = fields
        return replacements


def parse_content_type(c):
    """
        A simple parser for content-type values. Returns a (type, subtype,
        parameters) tuple, where type and subtype are strings, and parameters
        is a dict. If the string could not be parsed, return None.

        E.g. the following string:

            text/html; charset=UTF-8

        Returns:

            ("text", "html", {"charset": "UTF-8"})
    """
    parts = c.split(";", 1)
    ts = parts[0].split("/", 1)
    if len(ts) != 2:
        return None
    d = {}
    if len(parts) == 2:
        for i in parts[1].split(";"):
            clause = i.split("=", 1)
            if len(clause) == 2:
                d[clause[0].strip()] = clause[1].strip()
    return ts[0].lower(), ts[1].lower(), d
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`from __future__ import absolute_import, print_function, division`
add Serializeable.copy 2016-04-02 11:50:53 +00:00
improve .replace() and move it into netlib 2016-04-02 12:38:33 +00:00			`import re`

Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`import six`
Reorganise netlib imports according to Google Style Guide 2016-05-31 23:12:10 +00:00			`from netlib import multidict`
Utils reorganisation: add netlib.strutils Extract a number of string and format-related functions to netlib.strutils. 2016-06-02 00:31:41 +00:00			`from netlib import strutils`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`# See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
s/nocover/no cover/g according to coveralls docs 2016-03-27 10:02:41 +00:00			`if six.PY2: # pragma: no cover`
netlib: fix most flake8 offenses 2016-05-28 20:17:02 +00:00			`def _native(x):`
			`return x`

			`def _always_bytes(x):`
			`return x`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`else:`
			`# While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.`
netlib: fix most flake8 offenses 2016-05-28 20:17:02 +00:00			`def _native(x):`
			`return x.decode("utf-8", "surrogateescape")`

			`def _always_bytes(x):`
Utils reorganisation: add netlib.strutils Extract a number of string and format-related functions to netlib.strutils. 2016-06-02 00:31:41 +00:00			`return strutils.always_bytes(x, "utf-8", "surrogateescape")`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00

Reorganise netlib imports according to Google Style Guide 2016-05-31 23:12:10 +00:00			`class Headers(multidict.MultiDict):`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`"""`
			`Header class which allows both convenient access to individual headers as well as`
			`direct access to the underlying raw data. Provides a full dictionary interface.`

			`Example:`

			`.. code-block:: python`

refactor response model 2015-09-26 15:39:50 +00:00			`# Create headers with keyword arguments`
			`>>> h = Headers(host="example.com", content_type="application/xml")`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
			`# Headers mostly behave like a normal dict.`
			`>>> h["Host"]`
			`"example.com"`

			`# HTTP Headers are case insensitive`
			`>>> h["host"]`
			`"example.com"`

add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`# Headers can also be created from a list of raw (header_name, header_value) byte tuples`
refactor response model 2015-09-26 15:39:50 +00:00			`>>> h = Headers([`
fix Header docs 2016-05-20 16:37:13 +00:00			`(b"Host",b"example.com"),`
			`(b"Accept",b"text/html"),`
			`(b"accept",b"application/xml")`
refactor response model 2015-09-26 15:39:50 +00:00			`])`

Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`# Multiple headers are folded into a single header as per RFC7230`
			`>>> h["Accept"]`
			`"text/html, application/xml"`

			`# Setting a header removes all existing headers with the same name.`
			`>>> h["Accept"] = "application/text"`
			`>>> h["Accept"]`
			`"application/text"`

refactor response model 2015-09-26 15:39:50 +00:00			`# bytes(h) returns a HTTP1 header block.`
			`>>> print(bytes(h))`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`Host: example.com`
			`Accept: application/text`

			`# For full control, the raw header fields can be accessed`
			`>>> h.fields`

			`Caveats:`
			For use with the "Set-Cookie" header, see :py:meth:`get_all`.
			`"""`

fix tests 2016-05-29 02:31:43 +00:00			`def __init__(self, fields=(), **headers):`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`"""`
			`Args:`
refactor response model 2015-09-26 15:39:50 +00:00			fields: (optional) list of ``(name, value)`` header byte tuples,
			e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			**headers: Additional headers to set. Will overwrite existing values from `fields`.
			`For convenience, underscores in header names will be transformed to dashes -`
			`this behaviour does not extend to other methods.`
			If ``**headers`` contains multiple keys that have equal ``.lower()`` s,
			`the behavior is undefined.`
			`"""`
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`super(Headers, self).__init__(fields)`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
improve MultiDict, add ImmutableMultiDict, adjust response.cookies 2016-05-19 05:50:19 +00:00			`for key, value in self.fields:`
			`if not isinstance(key, bytes) or not isinstance(value, bytes):`
			`raise TypeError("Header fields must be bytes.")`

Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`# content_type -> content-type`
			`headers = {`
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`_always_bytes(name).replace(b"_", b"-"): _always_bytes(value)`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`for name, value in six.iteritems(headers)`
more style cleanup 2016-05-29 11:33:20 +00:00			`}`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`self.update(headers)`

add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`@staticmethod`
			`def _reduce_values(values):`
			`# Headers can be folded`
			`return ", ".join(values)`

			`@staticmethod`
			`def _kconv(key):`
			`# Headers are case-insensitive`
			`return key.lower()`

Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`def __bytes__(self):`
			`if self.fields:`
			`return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"`
			`else:`
			`return b""`

s/nocover/no cover/g according to coveralls docs 2016-03-27 10:02:41 +00:00			`if six.PY2: # pragma: no cover`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`__str__ = __bytes__`

add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`def __delitem__(self, key):`
			`key = _always_bytes(key)`
			`super(Headers, self).__delitem__(key)`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
			`def __iter__(self):`
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`for x in super(Headers, self).__iter__():`
			`yield _native(x)`

Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00			`def get_all(self, name):`
			`"""`
			Like :py:meth:`get`, but does not fold multiple headers into a single one.
			`This is useful for Set-Cookie headers, which do not support folding.`
			`See also: https://tools.ietf.org/html/rfc7230#section-3.2.2`
			`"""`
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`name = _always_bytes(name)`
			`return [`
			`_native(x) for x in`
			`super(Headers, self).get_all(name)`
			`]`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
			`def set_all(self, name, values):`
			`"""`
			`Explicitly set multiple headers for the given key.`
			See: :py:meth:`get_all`
			`"""`
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`name = _always_bytes(name)`
			`values = [_always_bytes(x) for x in values]`
			`return super(Headers, self).set_all(name, values)`
Headers: return str on all Python versions 2015-09-21 23:48:35 +00:00
add MultiDict This commit introduces MultiDict, a multi-dictionary similar to ODict, but with improved semantics (as in the Headers class). MultiDict fixes a few issues that were present in the Request/Response API. In particular, `request.cookies["foo"] = "bar"` has previously been a no-op, as the cookies property returned a mutable _copy_ of the cookies. 2016-05-19 01:46:42 +00:00			`def insert(self, index, key, value):`
			`key = _always_bytes(key)`
			`value = _always_bytes(value)`
			`super(Headers, self).insert(index, key, value)`
improve .replace() and move it into netlib 2016-04-02 12:38:33 +00:00
py3++, multidict fixes This commit improves Python 3 compatibility and fixes two multidict issues: 1. Headers.items(multi=True) now decodes fields 2. MultiDict.clear(item) has been removed, as Python's MutableMapping already defines .clear() with different semantics. This is confusing for everyone who expects a dict-like object. `.pop("attr", None)` is not fantastic, but it's the Python way to do it. 2016-07-07 02:50:06 +00:00			`def items(self, multi=False):`
			`if multi:`
			`return (`
			`(_native(k), _native(v))`
			`for k, v in self.fields`
			`)`
			`else:`
			`return super(Headers, self).items()`

improve .replace() and move it into netlib 2016-04-02 12:38:33 +00:00			`def replace(self, pattern, repl, flags=0):`
			`"""`
			`Replaces a regular expression pattern with repl in each "name: value"`
			`header line.`

			`Returns:`
			`The number of replacements made.`
			`"""`
py3++ 2016-07-01 21:10:48 +00:00			`if isinstance(pattern, six.text_type):`
			`pattern = strutils.escaped_str_to_bytes(pattern)`
			`if isinstance(repl, six.text_type):`
			`repl = strutils.escaped_str_to_bytes(repl)`
improve .replace() and move it into netlib 2016-04-02 12:38:33 +00:00			`pattern = re.compile(pattern, flags)`
			`replacements = 0`

			`fields = []`
			`for name, value in self.fields:`
			`line, n = pattern.subn(repl, name + b": " + value)`
			`try:`
			`name, value = line.split(b": ", 1)`
			`except ValueError:`
			`# We get a ValueError if the replacement removed the ": "`
			`# There's not much we can do about this, so we just keep the header as-is.`
			`pass`
			`else:`
			`replacements += n`
			`fields.append([name, value])`
			`self.fields = fields`
			`return replacements`
utils.multipartdecode -> http.multipart.decode also utils.parse_content_type -> http.headers.parse_content_type 2016-05-31 07:07:55 +00:00
Satisfy flake8 2016-05-31 07:58:28 +00:00
utils.multipartdecode -> http.multipart.decode also utils.parse_content_type -> http.headers.parse_content_type 2016-05-31 07:07:55 +00:00			`def parse_content_type(c):`
			`"""`
			`A simple parser for content-type values. Returns a (type, subtype,`
			`parameters) tuple, where type and subtype are strings, and parameters`
			`is a dict. If the string could not be parsed, return None.`

			`E.g. the following string:`

			`text/html; charset=UTF-8`

			`Returns:`

			`("text", "html", {"charset": "UTF-8"})`
			`"""`
			`parts = c.split(";", 1)`
			`ts = parts[0].split("/", 1)`
			`if len(ts) != 2:`
			`return None`
			`d = {}`
			`if len(parts) == 2:`
			`for i in parts[1].split(";"):`
			`clause = i.split("=", 1)`
			`if len(clause) == 2:`
			`d[clause[0].strip()] = clause[1].strip()`
			`return ts[0].lower(), ts[1].lower(), d`