2015-08-01 08:39:14 +00:00
|
|
|
"""
|
2016-07-02 08:51:47 +00:00
|
|
|
Utility functions for decoding response bodies.
|
2015-08-01 08:39:14 +00:00
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
2016-07-02 08:51:47 +00:00
|
|
|
|
|
|
|
import codecs
|
2016-07-24 01:24:02 +00:00
|
|
|
import collections
|
2016-09-03 13:01:41 +00:00
|
|
|
import six
|
2015-09-15 17:12:15 +00:00
|
|
|
from io import BytesIO
|
2016-09-03 13:01:41 +00:00
|
|
|
|
2015-08-01 08:39:14 +00:00
|
|
|
import gzip
|
|
|
|
import zlib
|
2016-07-30 12:43:53 +00:00
|
|
|
import brotli
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
from typing import Union # noqa
|
|
|
|
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-24 01:24:02 +00:00
|
|
|
# We have a shared single-element cache for encoding and decoding.
|
|
|
|
# This is quite useful in practice, e.g.
|
|
|
|
# flow.request.content = flow.request.content.replace(b"foo", b"bar")
|
|
|
|
# does not require an .encode() call if content does not contain b"foo"
|
|
|
|
CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded")
|
|
|
|
_cache = CachedDecode(None, None, None, None)
|
|
|
|
|
|
|
|
|
|
|
|
def decode(encoded, encoding, errors='strict'):
|
2016-07-20 01:09:33 +00:00
|
|
|
# type: (Union[str, bytes], str, str) -> Union[str, bytes]
|
2016-07-02 08:51:47 +00:00
|
|
|
"""
|
|
|
|
Decode the given input object
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
Returns:
|
|
|
|
The decoded value
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
Raises:
|
|
|
|
ValueError, if decoding fails.
|
|
|
|
"""
|
2016-09-06 15:00:08 +00:00
|
|
|
if len(encoded) == 0:
|
2016-09-03 13:01:41 +00:00
|
|
|
return encoded
|
|
|
|
|
2016-07-24 01:24:02 +00:00
|
|
|
global _cache
|
|
|
|
cached = (
|
2016-07-25 03:13:18 +00:00
|
|
|
isinstance(encoded, bytes) and
|
2016-07-24 01:24:02 +00:00
|
|
|
_cache.encoded == encoded and
|
|
|
|
_cache.encoding == encoding and
|
|
|
|
_cache.errors == errors
|
|
|
|
)
|
|
|
|
if cached:
|
|
|
|
return _cache.decoded
|
2016-07-02 08:51:47 +00:00
|
|
|
try:
|
|
|
|
try:
|
2016-07-24 01:24:02 +00:00
|
|
|
decoded = custom_decode[encoding](encoded)
|
2016-07-02 08:51:47 +00:00
|
|
|
except KeyError:
|
2016-07-24 01:24:02 +00:00
|
|
|
decoded = codecs.decode(encoded, encoding, errors)
|
2016-07-30 12:43:53 +00:00
|
|
|
if encoding in ("gzip", "deflate", "br"):
|
2016-07-24 01:24:02 +00:00
|
|
|
_cache = CachedDecode(encoded, encoding, errors, decoded)
|
|
|
|
return decoded
|
2016-09-03 13:01:41 +00:00
|
|
|
except TypeError:
|
|
|
|
raise
|
2016-07-02 08:51:47 +00:00
|
|
|
except Exception as e:
|
2016-09-03 13:01:41 +00:00
|
|
|
raise ValueError("{} when decoding {} with {}: {}".format(
|
2016-07-02 08:51:47 +00:00
|
|
|
type(e).__name__,
|
2016-07-24 01:24:02 +00:00
|
|
|
repr(encoded)[:10],
|
2016-07-02 08:51:47 +00:00
|
|
|
repr(encoding),
|
2016-09-03 13:01:41 +00:00
|
|
|
repr(e),
|
2016-07-02 08:51:47 +00:00
|
|
|
))
|
|
|
|
|
|
|
|
|
2016-07-24 01:24:02 +00:00
|
|
|
def encode(decoded, encoding, errors='strict'):
|
2016-07-20 01:09:33 +00:00
|
|
|
# type: (Union[str, bytes], str, str) -> Union[str, bytes]
|
2016-07-02 08:51:47 +00:00
|
|
|
"""
|
|
|
|
Encode the given input object
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
Returns:
|
|
|
|
The encoded value
|
2015-08-01 08:39:14 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
Raises:
|
|
|
|
ValueError, if encoding fails.
|
|
|
|
"""
|
2016-09-06 15:00:08 +00:00
|
|
|
if len(decoded) == 0:
|
2016-09-03 13:01:41 +00:00
|
|
|
return decoded
|
|
|
|
|
2016-07-24 01:24:02 +00:00
|
|
|
global _cache
|
|
|
|
cached = (
|
2016-07-25 03:13:18 +00:00
|
|
|
isinstance(decoded, bytes) and
|
2016-07-24 01:24:02 +00:00
|
|
|
_cache.decoded == decoded and
|
|
|
|
_cache.encoding == encoding and
|
|
|
|
_cache.errors == errors
|
|
|
|
)
|
|
|
|
if cached:
|
|
|
|
return _cache.encoded
|
2016-07-02 08:51:47 +00:00
|
|
|
try:
|
|
|
|
try:
|
2016-09-03 13:01:41 +00:00
|
|
|
value = decoded
|
|
|
|
if not six.PY2 and isinstance(value, six.string_types):
|
|
|
|
value = decoded.encode()
|
|
|
|
encoded = custom_encode[encoding](value)
|
2016-07-02 08:51:47 +00:00
|
|
|
except KeyError:
|
2016-07-24 01:24:02 +00:00
|
|
|
encoded = codecs.encode(decoded, encoding, errors)
|
2016-07-30 12:43:53 +00:00
|
|
|
if encoding in ("gzip", "deflate", "br"):
|
2016-07-24 01:24:02 +00:00
|
|
|
_cache = CachedDecode(encoded, encoding, errors, decoded)
|
|
|
|
return encoded
|
2016-09-03 13:01:41 +00:00
|
|
|
except TypeError:
|
|
|
|
raise
|
2016-07-02 08:51:47 +00:00
|
|
|
except Exception as e:
|
2016-09-03 13:01:41 +00:00
|
|
|
raise ValueError("{} when encoding {} with {}: {}".format(
|
2016-07-02 08:51:47 +00:00
|
|
|
type(e).__name__,
|
2016-07-24 01:24:02 +00:00
|
|
|
repr(decoded)[:10],
|
2016-07-02 08:51:47 +00:00
|
|
|
repr(encoding),
|
2016-09-03 13:01:41 +00:00
|
|
|
repr(e),
|
2016-07-02 08:51:47 +00:00
|
|
|
))
|
2015-08-01 08:39:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def identity(content):
|
|
|
|
"""
|
|
|
|
Returns content unchanged. Identity is the default value of
|
|
|
|
Accept-Encoding headers.
|
|
|
|
"""
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
def decode_gzip(content):
|
2015-09-15 17:12:15 +00:00
|
|
|
gfile = gzip.GzipFile(fileobj=BytesIO(content))
|
2016-07-02 08:51:47 +00:00
|
|
|
return gfile.read()
|
2015-08-01 08:39:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def encode_gzip(content):
|
2015-09-15 17:12:15 +00:00
|
|
|
s = BytesIO()
|
2015-08-01 08:39:14 +00:00
|
|
|
gf = gzip.GzipFile(fileobj=s, mode='wb')
|
|
|
|
gf.write(content)
|
|
|
|
gf.close()
|
|
|
|
return s.getvalue()
|
|
|
|
|
|
|
|
|
2016-07-30 12:43:53 +00:00
|
|
|
def decode_brotli(content):
|
|
|
|
return brotli.decompress(content)
|
|
|
|
|
|
|
|
|
|
|
|
def encode_brotli(content):
|
|
|
|
return brotli.compress(content)
|
|
|
|
|
|
|
|
|
2015-08-01 08:39:14 +00:00
|
|
|
def decode_deflate(content):
|
|
|
|
"""
|
|
|
|
Returns decompressed data for DEFLATE. Some servers may respond with
|
|
|
|
compressed data without a zlib header or checksum. An undocumented
|
|
|
|
feature of zlib permits the lenient decompression of data missing both
|
|
|
|
values.
|
|
|
|
|
|
|
|
http://bugs.python.org/issue5784
|
|
|
|
"""
|
|
|
|
try:
|
2016-07-02 08:51:47 +00:00
|
|
|
return zlib.decompress(content)
|
2015-08-01 08:39:14 +00:00
|
|
|
except zlib.error:
|
2016-07-02 08:51:47 +00:00
|
|
|
return zlib.decompress(content, -15)
|
2015-08-01 08:39:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def encode_deflate(content):
|
|
|
|
"""
|
|
|
|
Returns compressed content, always including zlib header and checksum.
|
|
|
|
"""
|
|
|
|
return zlib.compress(content)
|
2015-09-17 14:31:50 +00:00
|
|
|
|
2016-07-02 08:51:47 +00:00
|
|
|
|
|
|
|
custom_decode = {
|
2016-09-06 15:00:08 +00:00
|
|
|
"none": identity,
|
2016-07-02 08:51:47 +00:00
|
|
|
"identity": identity,
|
|
|
|
"gzip": decode_gzip,
|
|
|
|
"deflate": decode_deflate,
|
2016-07-30 12:43:53 +00:00
|
|
|
"br": decode_brotli,
|
2016-07-02 08:51:47 +00:00
|
|
|
}
|
|
|
|
custom_encode = {
|
2016-09-06 15:00:08 +00:00
|
|
|
"none": identity,
|
2016-07-02 08:51:47 +00:00
|
|
|
"identity": identity,
|
|
|
|
"gzip": encode_gzip,
|
|
|
|
"deflate": encode_deflate,
|
2016-07-30 12:43:53 +00:00
|
|
|
"br": encode_brotli,
|
2016-07-02 08:51:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
__all__ = ["encode", "decode"]
|