remove content caching in netlib.http.Message

This commit is contained in:
Maximilian Hils 2016-07-23 18:47:35 -07:00
parent d6209fa03b
commit e07f515a20
2 changed files with 31 additions and 117 deletions

View File

@ -49,23 +49,7 @@ class MessageData(basetypes.Serializable):
return cls(**state) return cls(**state)
class CachedDecode(object):
__slots__ = ["encoded", "encoding", "strict", "decoded"]
def __init__(self, object, encoding, strict, decoded):
self.encoded = object
self.encoding = encoding
self.strict = strict
self.decoded = decoded
no_cached_decode = CachedDecode(None, None, None, None)
class Message(basetypes.Serializable): class Message(basetypes.Serializable):
def __init__(self):
self._content_cache = no_cached_decode # type: CachedDecode
self._text_cache = no_cached_decode # type: CachedDecode
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, Message): if isinstance(other, Message):
return self.data == other.data return self.data == other.data
@ -126,25 +110,15 @@ class Message(basetypes.Serializable):
if self.raw_content is None: if self.raw_content is None:
return None return None
ce = self.headers.get("content-encoding") ce = self.headers.get("content-encoding")
cached = ( if ce:
self._content_cache.encoded == self.raw_content and try:
(self._content_cache.strict or not strict) and return encoding.decode(self.raw_content, ce)
self._content_cache.encoding == ce except ValueError:
) if strict:
if not cached: raise
is_strict = True return self.raw_content
if ce: else:
try: return self.raw_content
decoded = encoding.decode(self.raw_content, ce)
except ValueError:
if strict:
raise
is_strict = False
decoded = self.raw_content
else:
decoded = self.raw_content
self._content_cache = CachedDecode(self.raw_content, ce, is_strict, decoded)
return self._content_cache.decoded
def set_content(self, value): def set_content(self, value):
if value is None: if value is None:
@ -157,22 +131,13 @@ class Message(basetypes.Serializable):
.format(type(value).__name__) .format(type(value).__name__)
) )
ce = self.headers.get("content-encoding") ce = self.headers.get("content-encoding")
cached = ( try:
self._content_cache.decoded == value and self.raw_content = encoding.encode(value, ce or "identity")
self._content_cache.encoding == ce and except ValueError:
self._content_cache.strict # So we have an invalid content-encoding?
) # Let's remove it!
if not cached: del self.headers["content-encoding"]
try: self.raw_content = value
encoded = encoding.encode(value, ce or "identity")
except ValueError:
# So we have an invalid content-encoding?
# Let's remove it!
del self.headers["content-encoding"]
ce = None
encoded = value
self._content_cache = CachedDecode(encoded, ce, True, value)
self.raw_content = self._content_cache.encoded
self.headers["content-length"] = str(len(self.raw_content)) self.headers["content-length"] = str(len(self.raw_content))
content = property(get_content, set_content) content = property(get_content, set_content)
@ -244,22 +209,12 @@ class Message(basetypes.Serializable):
enc = self._guess_encoding() enc = self._guess_encoding()
content = self.get_content(strict) content = self.get_content(strict)
cached = ( try:
self._text_cache.encoded == content and return encoding.decode(content, enc)
(self._text_cache.strict or not strict) and except ValueError:
self._text_cache.encoding == enc if strict:
) raise
if not cached: return content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
is_strict = self._content_cache.strict
try:
decoded = encoding.decode(content, enc)
except ValueError:
if strict:
raise
is_strict = False
decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(content, enc, is_strict, decoded)
return self._text_cache.decoded
def set_text(self, text): def set_text(self, text):
if text is None: if text is None:
@ -267,23 +222,15 @@ class Message(basetypes.Serializable):
return return
enc = self._guess_encoding() enc = self._guess_encoding()
cached = ( try:
self._text_cache.decoded == text and self.content = encoding.encode(text, enc)
self._text_cache.encoding == enc and except ValueError:
self._text_cache.strict # Fall back to UTF-8 and update the content-type header.
) ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {})
if not cached: ct[2]["charset"] = "utf-8"
try: self.headers["content-type"] = headers.assemble_content_type(*ct)
encoded = encoding.encode(text, enc) enc = "utf8"
except ValueError: self.content = text.encode(enc, "replace" if six.PY2 else "surrogateescape")
# Fall back to UTF-8 and update the content-type header.
ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {})
ct[2]["charset"] = "utf-8"
self.headers["content-type"] = headers.assemble_content_type(*ct)
enc = "utf8"
encoded = text.encode(enc, "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(encoded, enc, True, text)
self.content = self._text_cache.encoded
text = property(get_text, set_text) text = property(get_text, set_text)

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import mock
import six import six
from netlib.tutils import tresp from netlib.tutils import tresp
@ -113,14 +112,6 @@ class TestMessageContentEncoding(object):
assert r.content == b"message" assert r.content == b"message"
assert r.raw_content != b"message" assert r.raw_content != b"message"
r.raw_content = b"foo"
with mock.patch("netlib.encoding.decode") as e:
assert r.content
assert e.call_count == 1
e.reset_mock()
assert r.content
assert e.call_count == 0
def test_modify(self): def test_modify(self):
r = tresp() r = tresp()
assert "content-encoding" not in r.headers assert "content-encoding" not in r.headers
@ -131,13 +122,6 @@ class TestMessageContentEncoding(object):
r.decode() r.decode()
assert r.raw_content == b"foo" assert r.raw_content == b"foo"
r.encode("identity")
with mock.patch("netlib.encoding.encode") as e:
r.content = b"foo"
assert e.call_count == 0
r.content = b"bar"
assert e.call_count == 1
with tutils.raises(TypeError): with tutils.raises(TypeError):
r.content = u"foo" r.content = u"foo"
@ -212,15 +196,6 @@ class TestMessageText(object):
r.headers["content-type"] = "text/html; charset=utf8" r.headers["content-type"] = "text/html; charset=utf8"
assert r.text == u"ü" assert r.text == u"ü"
r.encode("identity")
r.raw_content = b"foo"
with mock.patch("netlib.encoding.decode") as e:
assert r.text
assert e.call_count == 2
e.reset_mock()
assert r.text
assert e.call_count == 0
def test_guess_json(self): def test_guess_json(self):
r = tresp(content=b'"\xc3\xbc"') r = tresp(content=b'"\xc3\xbc"')
r.headers["content-type"] = "application/json" r.headers["content-type"] = "application/json"
@ -245,14 +220,6 @@ class TestMessageText(object):
assert r.raw_content == b"\xc3\xbc" assert r.raw_content == b"\xc3\xbc"
assert r.headers["content-length"] == "2" assert r.headers["content-length"] == "2"
r.encode("identity")
with mock.patch("netlib.encoding.encode") as e:
e.return_value = b""
r.text = u"ü"
assert e.call_count == 0
r.text = u"ä"
assert e.call_count == 2
def test_unknown_ce(self): def test_unknown_ce(self):
r = tresp() r = tresp()
r.headers["content-type"] = "text/html; charset=wtf" r.headers["content-type"] = "text/html; charset=wtf"