Py3: tnetstring

Copied the initial code from
[tnetstring3](https://raw.githubusercontent.com/carlopires/tnetstring3/master/tnetstring/__init__.py)

Then made some changes, like using the isdigit function, removing
superfluous ord() etc. to make all our tests pass.
This commit is contained in:
Shadab Zafar 2016-06-21 23:37:28 +05:30
parent 4576dbf8aa
commit b5d280cab1

View File

@ -79,9 +79,8 @@ __version__ = "%d.%d.%d%s" % (
__ver_major__, __ver_minor__, __ver_patch__, __ver_sub__) __ver_major__, __ver_minor__, __ver_patch__, __ver_sub__)
def dumps(value, encoding=None): def dumps(value):
"""dumps(object,encoding=None) -> string """
This function dumps a python object as a tnetstring. This function dumps a python object as a tnetstring.
""" """
# This uses a deque to collect output fragments in reverse order, # This uses a deque to collect output fragments in reverse order,
@ -91,22 +90,21 @@ def dumps(value, encoding=None):
# consider the _gdumps() function instead; it's a standard top-down # consider the _gdumps() function instead; it's a standard top-down
# generator that's simpler to understand but much less efficient. # generator that's simpler to understand but much less efficient.
q = deque() q = deque()
_rdumpq(q, 0, value, encoding) _rdumpq(q, 0, value)
return "".join(q) return b''.join(q)
def dump(value, file, encoding=None): def dump(value, file_handle):
"""dump(object,file,encoding=None)
This function dumps a python object as a tnetstring and writes it to
the given file.
""" """
file.write(dumps(value, encoding)) This function dumps a python object as a tnetstring and
file.flush() writes it to the given file.
"""
file_handle.write(dumps(value))
def _rdumpq(q, size, value, encoding=None): def _rdumpq(q, size, value):
"""Dump value as a tnetstring, to a deque instance, last chunks first. """
Dump value as a tnetstring, to a deque instance, last chunks first.
This function generates the tnetstring representation of the given value, This function generates the tnetstring representation of the given value,
pushing chunks of the output onto the given deque instance. It pushes pushing chunks of the output onto the given deque instance. It pushes
@ -122,79 +120,70 @@ def _rdumpq(q, size, value, encoding=None):
""" """
write = q.appendleft write = q.appendleft
if value is None: if value is None:
write("0:~") write(b'0:~')
return size + 3 return size + 3
if value is True: elif value is True:
write("4:true!") write(b'4:true!')
return size + 7 return size + 7
if value is False: elif value is False:
write("5:false!") write(b'5:false!')
return size + 8 return size + 8
if isinstance(value, six.integer_types): elif isinstance(value, six.integer_types):
data = str(value) data = str(value).encode()
ldata = len(data) ldata = len(data)
span = str(ldata) span = str(ldata).encode()
write("#") write(b'#')
write(data) write(data)
write(":") write(b':')
write(span) write(span)
return size + 2 + len(span) + ldata return size + 2 + len(span) + ldata
if isinstance(value, (float,)): elif isinstance(value, float):
# Use repr() for float rather than str(). # Use repr() for float rather than str().
# It round-trips more accurately. # It round-trips more accurately.
# Probably unnecessary in later python versions that # Probably unnecessary in later python versions that
# use David Gay's ftoa routines. # use David Gay's ftoa routines.
data = repr(value) data = repr(value).encode()
ldata = len(data) ldata = len(data)
span = str(ldata) span = str(ldata).encode()
write("^") write(b'^')
write(data) write(data)
write(":") write(b':')
write(span) write(span)
return size + 2 + len(span) + ldata return size + 2 + len(span) + ldata
if isinstance(value, str): elif isinstance(value, bytes):
lvalue = len(value) lvalue = len(value)
span = str(lvalue) span = str(lvalue).encode()
write(",") write(b',')
write(value) write(value)
write(":") write(b':')
write(span) write(span)
return size + 2 + len(span) + lvalue return size + 2 + len(span) + lvalue
if isinstance(value, (list, tuple,)): elif isinstance(value, (list, tuple)):
write("]") write(b']')
init_size = size = size + 1 init_size = size = size + 1
for item in reversed(value): for item in reversed(value):
size = _rdumpq(q, size, item, encoding) size = _rdumpq(q, size, item)
span = str(size - init_size) span = str(size - init_size).encode()
write(":") write(b':')
write(span) write(span)
return size + 1 + len(span) return size + 1 + len(span)
if isinstance(value, dict): elif isinstance(value, dict):
write("}") write(b'}')
init_size = size = size + 1 init_size = size = size + 1
for (k, v) in six.iteritems(value): for (k, v) in value.items():
size = _rdumpq(q, size, v, encoding) size = _rdumpq(q, size, v)
size = _rdumpq(q, size, k, encoding) size = _rdumpq(q, size, k)
span = str(size - init_size) span = str(size - init_size).encode()
write(":") write(b':')
write(span) write(span)
return size + 1 + len(span) return size + 1 + len(span)
if isinstance(value, unicode): else:
if encoding is None: raise ValueError("unserializable object: {} ({})".format(value, type(value)))
raise ValueError("must specify encoding to dump unicode strings")
value = value.encode(encoding)
lvalue = len(value)
span = str(lvalue)
write(",")
write(value)
write(":")
write(span)
return size + 2 + len(span) + lvalue
raise ValueError("unserializable object")
def _gdumps(value, encoding): def _gdumps(value):
"""Generate fragments of value dumped as a tnetstring. """
Generate fragments of value dumped as a tnetstring.
This is the naive dumping algorithm, implemented as a generator so that This is the naive dumping algorithm, implemented as a generator so that
it's easy to pass to "".join() without building a new list. it's easy to pass to "".join() without building a new list.
@ -203,72 +192,63 @@ def _gdumps(value, encoding):
measurably faster as it doesn't have to build intermediate strins. measurably faster as it doesn't have to build intermediate strins.
""" """
if value is None: if value is None:
yield "0:~" yield b'0:~'
elif value is True: elif value is True:
yield "4:true!" yield b'4:true!'
elif value is False: elif value is False:
yield "5:false!" yield b'5:false!'
elif isinstance(value, six.integer_types): elif isinstance(value, six.integer_types):
data = str(value) data = str(value).encode()
yield str(len(data)) yield str(len(data)).encode()
yield ":" yield b':'
yield data yield data
yield "#" yield b'#'
elif isinstance(value, (float,)): elif isinstance(value, float):
data = repr(value) data = repr(value).encode()
yield str(len(data)) yield str(len(data)).encode()
yield ":" yield b':'
yield data yield data
yield "^" yield b'^'
elif isinstance(value, (str,)): elif isinstance(value, bytes):
yield str(len(value)) yield str(len(value)).encode()
yield ":" yield b':'
yield value yield value
yield "," yield b','
elif isinstance(value, (list, tuple,)): elif isinstance(value, (list, tuple)):
sub = [] sub = []
for item in value: for item in value:
sub.extend(_gdumps(item)) sub.extend(_gdumps(item))
sub = "".join(sub) sub = b''.join(sub)
yield str(len(sub)) yield str(len(sub)).encode()
yield ":" yield b':'
yield sub yield sub
yield "]" yield b']'
elif isinstance(value, (dict,)): elif isinstance(value, (dict,)):
sub = [] sub = []
for (k, v) in six.iteritems(value): for (k, v) in value.items():
sub.extend(_gdumps(k)) sub.extend(_gdumps(k))
sub.extend(_gdumps(v)) sub.extend(_gdumps(v))
sub = "".join(sub) sub = b''.join(sub)
yield str(len(sub)) yield str(len(sub)).encode()
yield ":" yield b':'
yield sub yield sub
yield "}" yield b'}'
elif isinstance(value, (unicode,)):
if encoding is None:
raise ValueError("must specify encoding to dump unicode strings")
value = value.encode(encoding)
yield str(len(value))
yield ":"
yield value
yield ","
else: else:
raise ValueError("unserializable object") raise ValueError("unserializable object")
def loads(string, encoding=None): def loads(string):
"""loads(string,encoding=None) -> object """
This function parses a tnetstring into a python object. This function parses a tnetstring into a python object.
""" """
# No point duplicating effort here. In the C-extension version, # No point duplicating effort here. In the C-extension version,
# loads() is measurably faster then pop() since it can avoid # loads() is measurably faster then pop() since it can avoid
# the overhead of building a second string. # the overhead of building a second string.
return pop(string, encoding)[0] return pop(string)[0]
def load(file, encoding=None): def load(file_handle):
"""load(file,encoding=None) -> object """load(file) -> object
This function reads a tnetstring from a file and parses it into a This function reads a tnetstring from a file and parses it into a
python object. The file must support the read() method, and this python object. The file must support the read() method, and this
@ -276,70 +256,68 @@ def load(file, encoding=None):
""" """
# Read the length prefix one char at a time. # Read the length prefix one char at a time.
# Note that the netstring spec explicitly forbids padding zeros. # Note that the netstring spec explicitly forbids padding zeros.
c = file.read(1) c = file_handle.read(1)
if not c.isdigit(): if not c.isdigit():
raise ValueError("not a tnetstring: missing or invalid length prefix") raise ValueError("not a tnetstring: missing or invalid length prefix")
datalen = ord(c) - ord("0") datalen = ord(c) - ord('0')
c = file.read(1) c = file_handle.read(1)
if datalen != 0: if datalen != 0:
while c.isdigit(): while c.isdigit():
datalen = (10 * datalen) + (ord(c) - ord("0")) datalen = (10 * datalen) + (ord(c) - ord('0'))
if datalen > 999999999: if datalen > 999999999:
errmsg = "not a tnetstring: absurdly large length prefix" errmsg = "not a tnetstring: absurdly large length prefix"
raise ValueError(errmsg) raise ValueError(errmsg)
c = file.read(1) c = file_handle.read(1)
if c != ":": if c != b':':
raise ValueError("not a tnetstring: missing or invalid length prefix") raise ValueError("not a tnetstring: missing or invalid length prefix")
# Now we can read and parse the payload. # Now we can read and parse the payload.
# This repeats the dispatch logic of pop() so we can avoid # This repeats the dispatch logic of pop() so we can avoid
# re-constructing the outermost tnetstring. # re-constructing the outermost tnetstring.
data = file.read(datalen) data = file_handle.read(datalen)
if len(data) != datalen: if len(data) != datalen:
raise ValueError("not a tnetstring: length prefix too big") raise ValueError("not a tnetstring: length prefix too big")
type = file.read(1) tns_type = file_handle.read(1)
if type == ",": if tns_type == b',':
if encoding is not None:
return data.decode(encoding)
return data return data
if type == "#": if tns_type == b'#':
try: try:
return int(data) return int(data)
except ValueError: except ValueError:
raise ValueError("not a tnetstring: invalid integer literal") raise ValueError("not a tnetstring: invalid integer literal")
if type == "^": if tns_type == b'^':
try: try:
return float(data) return float(data)
except ValueError: except ValueError:
raise ValueError("not a tnetstring: invalid float literal") raise ValueError("not a tnetstring: invalid float literal")
if type == "!": if tns_type == b'!':
if data == "true": if data == b'true':
return True return True
elif data == "false": elif data == b'false':
return False return False
else: else:
raise ValueError("not a tnetstring: invalid boolean literal") raise ValueError("not a tnetstring: invalid boolean literal")
if type == "~": if tns_type == b'~':
if data: if data:
raise ValueError("not a tnetstring: invalid null literal") raise ValueError("not a tnetstring: invalid null literal")
return None return None
if type == "]": if tns_type == b']':
l = [] l = []
while data: while data:
(item, data) = pop(data, encoding) item, data = pop(data)
l.append(item) l.append(item)
return l return l
if type == "}": if tns_type == b'}':
d = {} d = {}
while data: while data:
(key, data) = pop(data, encoding) key, data = pop(data)
(val, data) = pop(data, encoding) val, data = pop(data)
d[key] = val d[key] = val
return d return d
raise ValueError("unknown type tag") raise ValueError("unknown type tag")
def pop(string, encoding=None): def pop(string):
"""pop(string,encoding=None) -> (object, remain) """pop(string,encoding='utf_8') -> (object, remain)
This function parses a tnetstring into a python object. This function parses a tnetstring into a python object.
It returns a tuple giving the parsed object and a string It returns a tuple giving the parsed object and a string
@ -347,53 +325,51 @@ def pop(string, encoding=None):
""" """
# Parse out data length, type and remaining string. # Parse out data length, type and remaining string.
try: try:
(dlen, rest) = string.split(":", 1) dlen, rest = string.split(b':', 1)
dlen = int(dlen) dlen = int(dlen)
except ValueError: except ValueError:
raise ValueError("not a tnetstring: missing or invalid length prefix") raise ValueError("not a tnetstring: missing or invalid length prefix: {}".format(string))
try: try:
(data, type, remain) = (rest[:dlen], rest[dlen], rest[dlen + 1:]) data, tns_type, remain = rest[:dlen], rest[dlen:dlen + 1], rest[dlen + 1:]
except IndexError: except IndexError:
# This fires if len(rest) < dlen, meaning we don't need # This fires if len(rest) < dlen, meaning we don't need
# to further validate that data is the right length. # to further validate that data is the right length.
raise ValueError("not a tnetstring: invalid length prefix") raise ValueError("not a tnetstring: invalid length prefix: {}".format(dlen))
# Parse the data based on the type tag. # Parse the data based on the type tag.
if type == ",": if tns_type == b',':
if encoding is not None: return data, remain
return (data.decode(encoding), remain) if tns_type == b'#':
return (data, remain)
if type == "#":
try: try:
return (int(data), remain) return int(data), remain
except ValueError: except ValueError:
raise ValueError("not a tnetstring: invalid integer literal") raise ValueError("not a tnetstring: invalid integer literal: {}".format(data))
if type == "^": if tns_type == b'^':
try: try:
return (float(data), remain) return float(data), remain
except ValueError: except ValueError:
raise ValueError("not a tnetstring: invalid float literal") raise ValueError("not a tnetstring: invalid float literal: {}".format(data))
if type == "!": if tns_type == b'!':
if data == "true": if data == b'true':
return (True, remain) return True, remain
elif data == "false": elif data == b'false':
return (False, remain) return False, remain
else: else:
raise ValueError("not a tnetstring: invalid boolean literal") raise ValueError("not a tnetstring: invalid boolean literal: {}".format(data))
if type == "~": if tns_type == b'~':
if data: if data:
raise ValueError("not a tnetstring: invalid null literal") raise ValueError("not a tnetstring: invalid null literal")
return (None, remain) return None, remain
if type == "]": if tns_type == b']':
l = [] l = []
while data: while data:
(item, data) = pop(data, encoding) item, data = pop(data)
l.append(item) l.append(item)
return (l, remain) return (l, remain)
if type == "}": if tns_type == b'}':
d = {} d = {}
while data: while data:
(key, data) = pop(data, encoding) key, data = pop(data)
(val, data) = pop(data, encoding) val, data = pop(data)
d[key] = val d[key] = val
return (d, remain) return d, remain
raise ValueError("unknown type tag") raise ValueError("unknown type tag: {}".format(tns_type))