""" Unicode Handling ---------------- See also: http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/ """ from __future__ import absolute_import, print_function, division import copy try: from collections.abc import MutableMapping except ImportError: # pragma: nocover from collections import MutableMapping # Workaround for Python < 3.3 import six from netlib.utils import always_byte_args, always_bytes if six.PY2: # pragma: nocover _native = lambda x: x _always_bytes = lambda x: x _always_byte_args = lambda x: x else: # While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded. _native = lambda x: x.decode("utf-8", "surrogateescape") _always_bytes = lambda x: always_bytes(x, "utf-8", "surrogateescape") _always_byte_args = always_byte_args("utf-8", "surrogateescape") class Headers(MutableMapping): """ Header class which allows both convenient access to individual headers as well as direct access to the underlying raw data. Provides a full dictionary interface. Example: .. code-block:: python # Create headers with keyword arguments >>> h = Headers(host="example.com", content_type="application/xml") # Headers mostly behave like a normal dict. >>> h["Host"] "example.com" # HTTP Headers are case insensitive >>> h["host"] "example.com" # Headers can also be creatd from a list of raw (header_name, header_value) byte tuples >>> h = Headers([ [b"Host",b"example.com"], [b"Accept",b"text/html"], [b"accept",b"application/xml"] ]) # Multiple headers are folded into a single header as per RFC7230 >>> h["Accept"] "text/html, application/xml" # Setting a header removes all existing headers with the same name. >>> h["Accept"] = "application/text" >>> h["Accept"] "application/text" # bytes(h) returns a HTTP1 header block. >>> print(bytes(h)) Host: example.com Accept: application/text # For full control, the raw header fields can be accessed >>> h.fields Caveats: For use with the "Set-Cookie" header, see :py:meth:`get_all`. """ @_always_byte_args def __init__(self, fields=None, **headers): """ Args: fields: (optional) list of ``(name, value)`` header byte tuples, e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes. **headers: Additional headers to set. Will overwrite existing values from `fields`. For convenience, underscores in header names will be transformed to dashes - this behaviour does not extend to other methods. If ``**headers`` contains multiple keys that have equal ``.lower()`` s, the behavior is undefined. """ self.fields = fields or [] for name, value in self.fields: if not isinstance(name, bytes) or not isinstance(value, bytes): raise ValueError("Headers passed as fields must be bytes.") # content_type -> content-type headers = { _always_bytes(name).replace(b"_", b"-"): value for name, value in six.iteritems(headers) } self.update(headers) def __bytes__(self): if self.fields: return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n" else: return b"" if six.PY2: # pragma: nocover __str__ = __bytes__ @_always_byte_args def __getitem__(self, name): values = self.get_all(name) if not values: raise KeyError(name) return ", ".join(values) @_always_byte_args def __setitem__(self, name, value): idx = self._index(name) # To please the human eye, we insert at the same position the first existing header occured. if idx is not None: del self[name] self.fields.insert(idx, [name, value]) else: self.fields.append([name, value]) @_always_byte_args def __delitem__(self, name): if name not in self: raise KeyError(name) name = name.lower() self.fields = [ field for field in self.fields if name != field[0].lower() ] def __iter__(self): seen = set() for name, _ in self.fields: name_lower = name.lower() if name_lower not in seen: seen.add(name_lower) yield _native(name) def __len__(self): return len(set(name.lower() for name, _ in self.fields)) # __hash__ = object.__hash__ def _index(self, name): name = name.lower() for i, field in enumerate(self.fields): if field[0].lower() == name: return i return None def __eq__(self, other): if isinstance(other, Headers): return self.fields == other.fields return False def __ne__(self, other): return not self.__eq__(other) @_always_byte_args def get_all(self, name): """ Like :py:meth:`get`, but does not fold multiple headers into a single one. This is useful for Set-Cookie headers, which do not support folding. See also: https://tools.ietf.org/html/rfc7230#section-3.2.2 """ name_lower = name.lower() values = [_native(value) for n, value in self.fields if n.lower() == name_lower] return values @_always_byte_args def set_all(self, name, values): """ Explicitly set multiple headers for the given key. See: :py:meth:`get_all` """ values = map(_always_bytes, values) # _always_byte_args does not fix lists if name in self: del self[name] self.fields.extend( [name, value] for value in values ) def copy(self): return Headers(copy.copy(self.fields)) # Implement the StateObject protocol from mitmproxy def get_state(self, short=False): return tuple(tuple(field) for field in self.fields) def load_state(self, state): self.fields = [list(field) for field in state] @classmethod def from_state(cls, state): return cls([list(field) for field in state])