remove content caching in netlib.http.Message

This commit is contained in:
Maximilian Hils 2016-07-23 18:47:35 -07:00
parent d6209fa03b
commit e07f515a20
2 changed files with 31 additions and 117 deletions

View File

@ -49,23 +49,7 @@ class MessageData(basetypes.Serializable):
return cls(**state) return cls(**state)
class CachedDecode(object):
__slots__ = ["encoded", "encoding", "strict", "decoded"]
def __init__(self, object, encoding, strict, decoded):
self.encoded = object
self.encoding = encoding
self.strict = strict
self.decoded = decoded
no_cached_decode = CachedDecode(None, None, None, None)
class Message(basetypes.Serializable): class Message(basetypes.Serializable):
def __init__(self):
self._content_cache = no_cached_decode # type: CachedDecode
self._text_cache = no_cached_decode # type: CachedDecode
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, Message): if isinstance(other, Message):
return self.data == other.data return self.data == other.data
@ -126,25 +110,15 @@ class Message(basetypes.Serializable):
if self.raw_content is None: if self.raw_content is None:
return None return None
ce = self.headers.get("content-encoding") ce = self.headers.get("content-encoding")
cached = (
self._content_cache.encoded == self.raw_content and
(self._content_cache.strict or not strict) and
self._content_cache.encoding == ce
)
if not cached:
is_strict = True
if ce: if ce:
try: try:
decoded = encoding.decode(self.raw_content, ce) return encoding.decode(self.raw_content, ce)
except ValueError: except ValueError:
if strict: if strict:
raise raise
is_strict = False return self.raw_content
decoded = self.raw_content
else: else:
decoded = self.raw_content return self.raw_content
self._content_cache = CachedDecode(self.raw_content, ce, is_strict, decoded)
return self._content_cache.decoded
def set_content(self, value): def set_content(self, value):
if value is None: if value is None:
@ -157,22 +131,13 @@ class Message(basetypes.Serializable):
.format(type(value).__name__) .format(type(value).__name__)
) )
ce = self.headers.get("content-encoding") ce = self.headers.get("content-encoding")
cached = (
self._content_cache.decoded == value and
self._content_cache.encoding == ce and
self._content_cache.strict
)
if not cached:
try: try:
encoded = encoding.encode(value, ce or "identity") self.raw_content = encoding.encode(value, ce or "identity")
except ValueError: except ValueError:
# So we have an invalid content-encoding? # So we have an invalid content-encoding?
# Let's remove it! # Let's remove it!
del self.headers["content-encoding"] del self.headers["content-encoding"]
ce = None self.raw_content = value
encoded = value
self._content_cache = CachedDecode(encoded, ce, True, value)
self.raw_content = self._content_cache.encoded
self.headers["content-length"] = str(len(self.raw_content)) self.headers["content-length"] = str(len(self.raw_content))
content = property(get_content, set_content) content = property(get_content, set_content)
@ -244,22 +209,12 @@ class Message(basetypes.Serializable):
enc = self._guess_encoding() enc = self._guess_encoding()
content = self.get_content(strict) content = self.get_content(strict)
cached = (
self._text_cache.encoded == content and
(self._text_cache.strict or not strict) and
self._text_cache.encoding == enc
)
if not cached:
is_strict = self._content_cache.strict
try: try:
decoded = encoding.decode(content, enc) return encoding.decode(content, enc)
except ValueError: except ValueError:
if strict: if strict:
raise raise
is_strict = False return content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
decoded = self.content.decode("utf8", "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(content, enc, is_strict, decoded)
return self._text_cache.decoded
def set_text(self, text): def set_text(self, text):
if text is None: if text is None:
@ -267,23 +222,15 @@ class Message(basetypes.Serializable):
return return
enc = self._guess_encoding() enc = self._guess_encoding()
cached = (
self._text_cache.decoded == text and
self._text_cache.encoding == enc and
self._text_cache.strict
)
if not cached:
try: try:
encoded = encoding.encode(text, enc) self.content = encoding.encode(text, enc)
except ValueError: except ValueError:
# Fall back to UTF-8 and update the content-type header. # Fall back to UTF-8 and update the content-type header.
ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {}) ct = headers.parse_content_type(self.headers.get("content-type", "")) or ("text", "plain", {})
ct[2]["charset"] = "utf-8" ct[2]["charset"] = "utf-8"
self.headers["content-type"] = headers.assemble_content_type(*ct) self.headers["content-type"] = headers.assemble_content_type(*ct)
enc = "utf8" enc = "utf8"
encoded = text.encode(enc, "replace" if six.PY2 else "surrogateescape") self.content = text.encode(enc, "replace" if six.PY2 else "surrogateescape")
self._text_cache = CachedDecode(encoded, enc, True, text)
self.content = self._text_cache.encoded
text = property(get_text, set_text) text = property(get_text, set_text)

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import mock
import six import six
from netlib.tutils import tresp from netlib.tutils import tresp
@ -113,14 +112,6 @@ class TestMessageContentEncoding(object):
assert r.content == b"message" assert r.content == b"message"
assert r.raw_content != b"message" assert r.raw_content != b"message"
r.raw_content = b"foo"
with mock.patch("netlib.encoding.decode") as e:
assert r.content
assert e.call_count == 1
e.reset_mock()
assert r.content
assert e.call_count == 0
def test_modify(self): def test_modify(self):
r = tresp() r = tresp()
assert "content-encoding" not in r.headers assert "content-encoding" not in r.headers
@ -131,13 +122,6 @@ class TestMessageContentEncoding(object):
r.decode() r.decode()
assert r.raw_content == b"foo" assert r.raw_content == b"foo"
r.encode("identity")
with mock.patch("netlib.encoding.encode") as e:
r.content = b"foo"
assert e.call_count == 0
r.content = b"bar"
assert e.call_count == 1
with tutils.raises(TypeError): with tutils.raises(TypeError):
r.content = u"foo" r.content = u"foo"
@ -212,15 +196,6 @@ class TestMessageText(object):
r.headers["content-type"] = "text/html; charset=utf8" r.headers["content-type"] = "text/html; charset=utf8"
assert r.text == u"ü" assert r.text == u"ü"
r.encode("identity")
r.raw_content = b"foo"
with mock.patch("netlib.encoding.decode") as e:
assert r.text
assert e.call_count == 2
e.reset_mock()
assert r.text
assert e.call_count == 0
def test_guess_json(self): def test_guess_json(self):
r = tresp(content=b'"\xc3\xbc"') r = tresp(content=b'"\xc3\xbc"')
r.headers["content-type"] = "application/json" r.headers["content-type"] = "application/json"
@ -245,14 +220,6 @@ class TestMessageText(object):
assert r.raw_content == b"\xc3\xbc" assert r.raw_content == b"\xc3\xbc"
assert r.headers["content-length"] == "2" assert r.headers["content-length"] == "2"
r.encode("identity")
with mock.patch("netlib.encoding.encode") as e:
e.return_value = b""
r.text = u"ü"
assert e.call_count == 0
r.text = u"ä"
assert e.call_count == 2
def test_unknown_ce(self): def test_unknown_ce(self):
r = tresp() r = tresp()
r.headers["content-type"] = "text/html; charset=wtf" r.headers["content-type"] = "text/html; charset=wtf"