mitmproxy/netlib/encoding.py

177 lines
4.3 KiB
Python
Raw Normal View History

2015-08-01 08:39:14 +00:00
"""
Utility functions for decoding response bodies.
2015-08-01 08:39:14 +00:00
"""
from __future__ import absolute_import
import codecs
import collections
2015-09-15 17:12:15 +00:00
from io import BytesIO
2016-09-03 13:01:41 +00:00
2015-08-01 08:39:14 +00:00
import gzip
import zlib
2016-07-30 12:43:53 +00:00
import brotli
2015-08-01 08:39:14 +00:00
2016-10-17 03:56:46 +00:00
from typing import Union
2015-08-01 08:39:14 +00:00
# We have a shared single-element cache for encoding and decoding.
# This is quite useful in practice, e.g.
# flow.request.content = flow.request.content.replace(b"foo", b"bar")
# does not require an .encode() call if content does not contain b"foo"
CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded")
_cache = CachedDecode(None, None, None, None)
2016-10-17 03:56:46 +00:00
def decode(encoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
"""
Decode the given input object
2015-08-01 08:39:14 +00:00
Returns:
The decoded value
2015-08-01 08:39:14 +00:00
Raises:
ValueError, if decoding fails.
"""
2016-09-06 15:00:08 +00:00
if len(encoded) == 0:
2016-09-03 13:01:41 +00:00
return encoded
global _cache
cached = (
2016-07-25 03:13:18 +00:00
isinstance(encoded, bytes) and
_cache.encoded == encoded and
_cache.encoding == encoding and
_cache.errors == errors
)
if cached:
return _cache.decoded
try:
try:
decoded = custom_decode[encoding](encoded)
except KeyError:
decoded = codecs.decode(encoded, encoding, errors)
2016-07-30 12:43:53 +00:00
if encoding in ("gzip", "deflate", "br"):
_cache = CachedDecode(encoded, encoding, errors, decoded)
return decoded
2016-09-03 13:01:41 +00:00
except TypeError:
raise
except Exception as e:
2016-09-03 13:01:41 +00:00
raise ValueError("{} when decoding {} with {}: {}".format(
type(e).__name__,
repr(encoded)[:10],
repr(encoding),
2016-09-03 13:01:41 +00:00
repr(e),
))
2016-10-17 03:56:46 +00:00
def encode(decoded: Union[str, bytes], encoding: str, errors: str='strict') -> Union[str, bytes]:
"""
Encode the given input object
2015-08-01 08:39:14 +00:00
Returns:
The encoded value
2015-08-01 08:39:14 +00:00
Raises:
ValueError, if encoding fails.
"""
2016-09-06 15:00:08 +00:00
if len(decoded) == 0:
2016-09-03 13:01:41 +00:00
return decoded
global _cache
cached = (
2016-07-25 03:13:18 +00:00
isinstance(decoded, bytes) and
_cache.decoded == decoded and
_cache.encoding == encoding and
_cache.errors == errors
)
if cached:
return _cache.encoded
try:
try:
2016-09-03 13:01:41 +00:00
value = decoded
if isinstance(value, str):
2016-09-03 13:01:41 +00:00
value = decoded.encode()
encoded = custom_encode[encoding](value)
except KeyError:
encoded = codecs.encode(decoded, encoding, errors)
2016-07-30 12:43:53 +00:00
if encoding in ("gzip", "deflate", "br"):
_cache = CachedDecode(encoded, encoding, errors, decoded)
return encoded
2016-09-03 13:01:41 +00:00
except TypeError:
raise
except Exception as e:
2016-09-03 13:01:41 +00:00
raise ValueError("{} when encoding {} with {}: {}".format(
type(e).__name__,
repr(decoded)[:10],
repr(encoding),
2016-09-03 13:01:41 +00:00
repr(e),
))
2015-08-01 08:39:14 +00:00
def identity(content):
"""
Returns content unchanged. Identity is the default value of
Accept-Encoding headers.
"""
return content
def decode_gzip(content):
2015-09-15 17:12:15 +00:00
gfile = gzip.GzipFile(fileobj=BytesIO(content))
return gfile.read()
2015-08-01 08:39:14 +00:00
def encode_gzip(content):
2015-09-15 17:12:15 +00:00
s = BytesIO()
2015-08-01 08:39:14 +00:00
gf = gzip.GzipFile(fileobj=s, mode='wb')
gf.write(content)
gf.close()
return s.getvalue()
2016-07-30 12:43:53 +00:00
def decode_brotli(content):
return brotli.decompress(content)
def encode_brotli(content):
return brotli.compress(content)
2015-08-01 08:39:14 +00:00
def decode_deflate(content):
"""
Returns decompressed data for DEFLATE. Some servers may respond with
compressed data without a zlib header or checksum. An undocumented
feature of zlib permits the lenient decompression of data missing both
values.
http://bugs.python.org/issue5784
"""
try:
return zlib.decompress(content)
2015-08-01 08:39:14 +00:00
except zlib.error:
return zlib.decompress(content, -15)
2015-08-01 08:39:14 +00:00
def encode_deflate(content):
"""
Returns compressed content, always including zlib header and checksum.
"""
return zlib.compress(content)
2015-09-17 14:31:50 +00:00
custom_decode = {
2016-09-06 15:00:08 +00:00
"none": identity,
"identity": identity,
"gzip": decode_gzip,
"deflate": decode_deflate,
2016-07-30 12:43:53 +00:00
"br": decode_brotli,
}
custom_encode = {
2016-09-06 15:00:08 +00:00
"none": identity,
"identity": identity,
"gzip": encode_gzip,
"deflate": encode_deflate,
2016-07-30 12:43:53 +00:00
"br": encode_brotli,
}
__all__ = ["encode", "decode"]