2015-09-25 22:39:04 +00:00
|
|
|
from __future__ import absolute_import, print_function, division
|
|
|
|
|
2016-07-01 21:10:48 +00:00
|
|
|
import re
|
2015-09-25 22:39:04 +00:00
|
|
|
import warnings
|
|
|
|
|
|
|
|
import six
|
|
|
|
|
2016-06-02 00:31:41 +00:00
|
|
|
from netlib import encoding, strutils, basetypes
|
2016-05-31 23:12:10 +00:00
|
|
|
from netlib.http import headers
|
2015-09-25 22:39:04 +00:00
|
|
|
|
2016-03-27 10:02:41 +00:00
|
|
|
if six.PY2: # pragma: no cover
|
2016-05-28 20:17:02 +00:00
|
|
|
def _native(x):
|
|
|
|
return x
|
|
|
|
|
|
|
|
def _always_bytes(x):
|
|
|
|
return x
|
2015-09-25 22:39:04 +00:00
|
|
|
else:
|
2016-05-28 20:17:02 +00:00
|
|
|
# While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
|
|
|
|
def _native(x):
|
|
|
|
return x.decode("utf-8", "surrogateescape")
|
|
|
|
|
|
|
|
def _always_bytes(x):
|
2016-06-02 00:31:41 +00:00
|
|
|
return strutils.always_bytes(x, "utf-8", "surrogateescape")
|
2015-09-25 22:39:04 +00:00
|
|
|
|
|
|
|
|
2016-05-31 05:16:31 +00:00
|
|
|
class MessageData(basetypes.Serializable):
|
2015-09-26 22:49:41 +00:00
|
|
|
def __eq__(self, other):
|
|
|
|
if isinstance(other, MessageData):
|
|
|
|
return self.__dict__ == other.__dict__
|
|
|
|
return False
|
|
|
|
|
|
|
|
def __ne__(self, other):
|
|
|
|
return not self.__eq__(other)
|
|
|
|
|
2016-05-20 20:59:42 +00:00
|
|
|
def __hash__(self):
|
|
|
|
return hash(frozenset(self.__dict__.items()))
|
|
|
|
|
2016-02-08 03:16:58 +00:00
|
|
|
def set_state(self, state):
|
|
|
|
for k, v in state.items():
|
|
|
|
if k == "headers":
|
2016-05-31 05:16:31 +00:00
|
|
|
v = headers.Headers.from_state(v)
|
2016-02-08 03:16:58 +00:00
|
|
|
setattr(self, k, v)
|
|
|
|
|
|
|
|
def get_state(self):
|
|
|
|
state = vars(self).copy()
|
|
|
|
state["headers"] = state["headers"].get_state()
|
|
|
|
return state
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_state(cls, state):
|
2016-05-31 05:16:31 +00:00
|
|
|
state["headers"] = headers.Headers.from_state(state["headers"])
|
2016-02-08 03:16:58 +00:00
|
|
|
return cls(**state)
|
|
|
|
|
2015-09-26 22:49:41 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
class CachedDecode(object):
|
|
|
|
__slots__ = ["encoded", "encoding", "decoded"]
|
|
|
|
|
|
|
|
def __init__(self, object, encoding, decoded):
|
|
|
|
self.encoded = object
|
|
|
|
self.encoding = encoding
|
|
|
|
self.decoded = decoded
|
|
|
|
|
|
|
|
no_cached_decode = CachedDecode(None, None, None)
|
|
|
|
|
|
|
|
|
2016-05-31 05:16:31 +00:00
|
|
|
class Message(basetypes.Serializable):
|
2016-07-02 08:51:47 +00:00
|
|
|
def __init__(self):
|
|
|
|
self._content_cache = no_cached_decode # type: CachedDecode
|
|
|
|
self._text_cache = no_cached_decode # type: CachedDecode
|
|
|
|
|
2015-09-25 22:39:04 +00:00
|
|
|
def __eq__(self, other):
|
|
|
|
if isinstance(other, Message):
|
|
|
|
return self.data == other.data
|
|
|
|
return False
|
|
|
|
|
|
|
|
def __ne__(self, other):
|
|
|
|
return not self.__eq__(other)
|
|
|
|
|
2016-05-20 20:59:42 +00:00
|
|
|
def __hash__(self):
|
|
|
|
return hash(self.data) ^ 1
|
|
|
|
|
2016-02-08 03:16:58 +00:00
|
|
|
def get_state(self):
|
|
|
|
return self.data.get_state()
|
|
|
|
|
|
|
|
def set_state(self, state):
|
|
|
|
self.data.set_state(state)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_state(cls, state):
|
2016-05-31 05:16:31 +00:00
|
|
|
state["headers"] = headers.Headers.from_state(state["headers"])
|
2016-02-08 03:16:58 +00:00
|
|
|
return cls(**state)
|
|
|
|
|
2015-09-25 22:39:04 +00:00
|
|
|
@property
|
|
|
|
def headers(self):
|
|
|
|
"""
|
|
|
|
Message headers object
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
netlib.http.Headers
|
|
|
|
"""
|
|
|
|
return self.data.headers
|
|
|
|
|
|
|
|
@headers.setter
|
|
|
|
def headers(self, h):
|
|
|
|
self.data.headers = h
|
|
|
|
|
2015-09-26 15:39:50 +00:00
|
|
|
@property
|
2016-07-02 08:51:47 +00:00
|
|
|
def raw_content(self):
|
|
|
|
# type: () -> bytes
|
2015-09-26 15:39:50 +00:00
|
|
|
"""
|
|
|
|
The raw (encoded) HTTP message body
|
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
See also: :py:attr:`content`, :py:class:`text`
|
2015-09-26 15:39:50 +00:00
|
|
|
"""
|
|
|
|
return self.data.content
|
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
@raw_content.setter
|
|
|
|
def raw_content(self, content):
|
2015-09-26 15:39:50 +00:00
|
|
|
self.data.content = content
|
2016-07-02 08:51:47 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def content(self):
|
|
|
|
# type: () -> bytes
|
|
|
|
"""
|
|
|
|
The HTTP message body decoded with the content-encoding header (e.g. gzip)
|
|
|
|
|
|
|
|
See also: :py:class:`raw_content`, :py:attr:`text`
|
|
|
|
"""
|
|
|
|
ce = self.headers.get("content-encoding")
|
|
|
|
cached = (
|
|
|
|
self._content_cache.encoded == self.raw_content and
|
|
|
|
self._content_cache.encoding == ce
|
|
|
|
)
|
|
|
|
if not cached:
|
|
|
|
try:
|
|
|
|
if not ce:
|
|
|
|
raise ValueError()
|
|
|
|
decoded = encoding.decode(self.raw_content, ce)
|
|
|
|
except ValueError:
|
|
|
|
decoded = self.raw_content
|
|
|
|
self._content_cache = CachedDecode(self.raw_content, ce, decoded)
|
|
|
|
return self._content_cache.decoded
|
|
|
|
|
|
|
|
@content.setter
|
|
|
|
def content(self, value):
|
|
|
|
ce = self.headers.get("content-encoding")
|
|
|
|
cached = (
|
|
|
|
self._content_cache.decoded == value and
|
|
|
|
self._content_cache.encoding == ce
|
|
|
|
)
|
|
|
|
if not cached:
|
|
|
|
try:
|
|
|
|
if not ce:
|
|
|
|
raise ValueError()
|
|
|
|
encoded = encoding.encode(value, ce)
|
|
|
|
except ValueError:
|
|
|
|
# Do we have an unknown content-encoding?
|
|
|
|
# If so, we want to remove it.
|
|
|
|
if value and ce:
|
|
|
|
self.headers.pop("content-encoding", None)
|
|
|
|
ce = None
|
|
|
|
encoded = value
|
|
|
|
self._content_cache = CachedDecode(encoded, ce, value)
|
|
|
|
self.raw_content = self._content_cache.encoded
|
|
|
|
if isinstance(self.raw_content, bytes):
|
|
|
|
self.headers["content-length"] = str(len(self.raw_content))
|
2015-09-26 15:39:50 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def http_version(self):
|
|
|
|
"""
|
|
|
|
Version string, e.g. "HTTP/1.1"
|
|
|
|
"""
|
|
|
|
return _native(self.data.http_version)
|
|
|
|
|
|
|
|
@http_version.setter
|
|
|
|
def http_version(self, http_version):
|
|
|
|
self.data.http_version = _always_bytes(http_version)
|
|
|
|
|
2015-09-25 22:39:04 +00:00
|
|
|
@property
|
|
|
|
def timestamp_start(self):
|
|
|
|
"""
|
|
|
|
First byte timestamp
|
|
|
|
"""
|
|
|
|
return self.data.timestamp_start
|
|
|
|
|
|
|
|
@timestamp_start.setter
|
|
|
|
def timestamp_start(self, timestamp_start):
|
|
|
|
self.data.timestamp_start = timestamp_start
|
|
|
|
|
|
|
|
@property
|
|
|
|
def timestamp_end(self):
|
|
|
|
"""
|
|
|
|
Last byte timestamp
|
|
|
|
"""
|
|
|
|
return self.data.timestamp_end
|
|
|
|
|
|
|
|
@timestamp_end.setter
|
|
|
|
def timestamp_end(self, timestamp_end):
|
|
|
|
self.data.timestamp_end = timestamp_end
|
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
def _get_content_type_charset(self):
|
|
|
|
# type: () -> Optional[str]
|
|
|
|
ct = headers.parse_content_type(self.headers.get("content-type", ""))
|
|
|
|
if ct:
|
|
|
|
return ct[2].get("charset")
|
|
|
|
|
2015-09-25 22:39:04 +00:00
|
|
|
@property
|
|
|
|
def text(self):
|
2016-07-02 08:51:47 +00:00
|
|
|
# type: () -> six.text_type
|
2015-09-25 22:39:04 +00:00
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
The HTTP message body decoded with both content-encoding header (e.g. gzip)
|
|
|
|
and content-type header charset.
|
2015-09-25 22:39:04 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
See also: :py:attr:`content`, :py:class:`raw_content`
|
2015-09-25 22:39:04 +00:00
|
|
|
"""
|
|
|
|
# This attribute should be called text, because that's what requests does.
|
2016-07-02 08:51:47 +00:00
|
|
|
enc = self._get_content_type_charset()
|
|
|
|
|
|
|
|
# We may also want to check for HTML meta tags here at some point.
|
|
|
|
|
|
|
|
cached = (
|
|
|
|
self._text_cache.encoded == self.content and
|
|
|
|
self._text_cache.encoding == enc
|
|
|
|
)
|
|
|
|
if not cached:
|
|
|
|
try:
|
|
|
|
if not enc:
|
|
|
|
raise ValueError()
|
|
|
|
decoded = encoding.decode(self.content, enc)
|
|
|
|
except ValueError:
|
|
|
|
decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
|
|
|
|
self._text_cache = CachedDecode(self.content, enc, decoded)
|
|
|
|
return self._text_cache.decoded
|
2015-09-25 22:39:04 +00:00
|
|
|
|
|
|
|
@text.setter
|
|
|
|
def text(self, text):
|
2016-07-02 08:51:47 +00:00
|
|
|
enc = self._get_content_type_charset()
|
|
|
|
cached = (
|
|
|
|
self._text_cache.decoded == text and
|
|
|
|
self._text_cache.encoding == enc
|
|
|
|
)
|
|
|
|
if not cached:
|
|
|
|
try:
|
|
|
|
if not enc:
|
|
|
|
raise ValueError()
|
|
|
|
encoded = encoding.encode(text, enc)
|
|
|
|
except ValueError:
|
|
|
|
# Do we have an unknown content-type charset?
|
|
|
|
# If so, we want to replace it with utf8.
|
|
|
|
if text and enc:
|
|
|
|
self.headers["content-type"] = re.sub(
|
|
|
|
"charset=[^;]+",
|
|
|
|
"charset=utf-8",
|
|
|
|
self.headers["content-type"]
|
|
|
|
)
|
|
|
|
encoded = text.encode("utf8", "replace" if six.PY2 else "surrogateescape")
|
|
|
|
self._text_cache = CachedDecode(encoded, enc, text)
|
|
|
|
self.content = self._text_cache.encoded
|
2015-09-25 22:39:04 +00:00
|
|
|
|
2015-09-26 18:07:11 +00:00
|
|
|
def decode(self):
|
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
Decodes body based on the current Content-Encoding header, then
|
|
|
|
removes the header. If there is no Content-Encoding header, no
|
|
|
|
action is taken.
|
2015-09-26 18:07:11 +00:00
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
self.raw_content = self.content
|
2015-09-26 18:07:11 +00:00
|
|
|
self.headers.pop("content-encoding", None)
|
|
|
|
|
|
|
|
def encode(self, e):
|
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
Encodes body with the encoding e, where e is "gzip", "deflate" or "identity".
|
2015-09-26 18:07:11 +00:00
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
self.decode() # remove the current encoding
|
2015-09-26 18:07:11 +00:00
|
|
|
self.headers["content-encoding"] = e
|
2016-07-02 08:51:47 +00:00
|
|
|
self.content = self.raw_content
|
2015-09-26 18:07:11 +00:00
|
|
|
|
2016-04-02 12:38:33 +00:00
|
|
|
def replace(self, pattern, repl, flags=0):
|
|
|
|
"""
|
|
|
|
Replaces a regular expression pattern with repl in both the headers
|
|
|
|
and the body of the message. Encoded body will be decoded
|
|
|
|
before replacement, and re-encoded afterwards.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The number of replacements made.
|
|
|
|
"""
|
2016-07-01 21:10:48 +00:00
|
|
|
if isinstance(pattern, six.text_type):
|
|
|
|
pattern = strutils.escaped_str_to_bytes(pattern)
|
|
|
|
if isinstance(repl, six.text_type):
|
|
|
|
repl = strutils.escaped_str_to_bytes(repl)
|
2016-04-02 12:38:33 +00:00
|
|
|
replacements = 0
|
|
|
|
if self.content:
|
2016-07-02 08:51:47 +00:00
|
|
|
self.content, replacements = re.subn(
|
|
|
|
pattern, repl, self.content, flags=flags
|
|
|
|
)
|
2016-04-02 12:38:33 +00:00
|
|
|
replacements += self.headers.replace(pattern, repl, flags)
|
|
|
|
return replacements
|
|
|
|
|
2015-09-26 15:39:50 +00:00
|
|
|
# Legacy
|
|
|
|
|
2015-09-25 22:39:04 +00:00
|
|
|
@property
|
2016-03-27 10:02:41 +00:00
|
|
|
def body(self): # pragma: no cover
|
2015-09-25 22:39:04 +00:00
|
|
|
warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
|
|
|
|
return self.content
|
|
|
|
|
|
|
|
@body.setter
|
2016-03-27 10:02:41 +00:00
|
|
|
def body(self, body): # pragma: no cover
|
2015-09-25 22:39:04 +00:00
|
|
|
warnings.warn(".body is deprecated, use .content instead.", DeprecationWarning)
|
|
|
|
self.content = body
|
|
|
|
|
|
|
|
|
|
|
|
class decoded(object):
|
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
Deprecated: You can now directly use :py:attr:`content`.
|
|
|
|
:py:attr:`raw_content` has the encoded content.
|
2015-09-25 22:39:04 +00:00
|
|
|
"""
|
|
|
|
|
2016-07-02 10:03:42 +00:00
|
|
|
def __init__(self, message): # pragma no cover
|
2016-07-02 08:51:47 +00:00
|
|
|
warnings.warn("decoded() is deprecated, you can now directly use .content instead. "
|
|
|
|
".raw_content has the encoded content.", DeprecationWarning)
|
2015-09-25 22:39:04 +00:00
|
|
|
|
2016-07-02 10:03:42 +00:00
|
|
|
def __enter__(self): # pragma no cover
|
2016-07-02 08:51:47 +00:00
|
|
|
pass
|
2015-09-25 22:39:04 +00:00
|
|
|
|
2016-07-02 10:03:42 +00:00
|
|
|
def __exit__(self, type, value, tb): # pragma no cover
|
2016-07-02 09:11:00 +00:00
|
|
|
pass
|