2015-05-02 04:17:00 +00:00
|
|
|
import string
|
|
|
|
import random
|
|
|
|
import mmap
|
|
|
|
|
|
|
|
DATATYPES = dict(
|
2016-06-02 07:29:14 +00:00
|
|
|
ascii_letters=string.ascii_letters.encode(),
|
|
|
|
ascii_lowercase=string.ascii_lowercase.encode(),
|
|
|
|
ascii_uppercase=string.ascii_uppercase.encode(),
|
|
|
|
digits=string.digits.encode(),
|
|
|
|
hexdigits=string.hexdigits.encode(),
|
|
|
|
octdigits=string.octdigits.encode(),
|
|
|
|
punctuation=string.punctuation.encode(),
|
|
|
|
whitespace=string.whitespace.encode(),
|
|
|
|
ascii=string.printable.encode(),
|
|
|
|
bytes=bytes(bytearray(range(256)))
|
2015-05-02 04:17:00 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2015-06-18 09:07:33 +00:00
|
|
|
class TransformGenerator(object):
|
2015-06-18 16:12:11 +00:00
|
|
|
|
2015-05-02 04:17:00 +00:00
|
|
|
"""
|
|
|
|
Perform a byte-by-byte transform another generator - that is, for each
|
|
|
|
input byte, the transformation must produce one output byte.
|
|
|
|
|
|
|
|
gen: A generator to wrap
|
|
|
|
transform: A function (offset, data) -> transformed
|
|
|
|
"""
|
2015-05-30 00:03:13 +00:00
|
|
|
|
2015-05-02 04:17:00 +00:00
|
|
|
def __init__(self, gen, transform):
|
|
|
|
self.gen = gen
|
|
|
|
self.transform = transform
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self.gen)
|
|
|
|
|
|
|
|
def __getitem__(self, x):
|
|
|
|
d = self.gen.__getitem__(x)
|
|
|
|
return self.transform(x, d)
|
|
|
|
|
|
|
|
def __getslice__(self, a, b):
|
|
|
|
d = self.gen.__getslice__(a, b)
|
|
|
|
return self.transform(a, d)
|
|
|
|
|
|
|
|
def __repr__(self):
|
2015-05-30 00:03:13 +00:00
|
|
|
return "'transform(%s)'" % self.gen
|
2015-05-02 04:17:00 +00:00
|
|
|
|
|
|
|
|
2016-06-02 07:29:14 +00:00
|
|
|
def rand_byte(chars):
|
|
|
|
"""
|
|
|
|
Return a random character as byte from a charset.
|
|
|
|
"""
|
|
|
|
# bytearray has consistent behaviour on both Python 2 and 3
|
|
|
|
# while bytes does not
|
|
|
|
return bytes(bytearray([random.choice(chars)]))
|
|
|
|
|
|
|
|
|
2015-06-18 09:07:33 +00:00
|
|
|
class RandomGenerator(object):
|
2015-06-18 16:12:11 +00:00
|
|
|
|
2015-05-02 04:17:00 +00:00
|
|
|
def __init__(self, dtype, length):
|
|
|
|
self.dtype = dtype
|
|
|
|
self.length = length
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return self.length
|
|
|
|
|
|
|
|
def __getitem__(self, x):
|
|
|
|
chars = DATATYPES[self.dtype]
|
2016-06-02 07:29:14 +00:00
|
|
|
if isinstance(x, slice):
|
|
|
|
return b"".join(rand_byte(chars) for _ in range(*x.indices(self.length)))
|
|
|
|
return rand_byte(chars)
|
2015-05-02 04:17:00 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
2015-05-30 00:03:13 +00:00
|
|
|
return "%s random from %s" % (self.length, self.dtype)
|
2015-05-02 04:17:00 +00:00
|
|
|
|
|
|
|
|
2015-06-18 09:07:33 +00:00
|
|
|
class FileGenerator(object):
|
2015-06-18 16:12:11 +00:00
|
|
|
|
2015-05-02 04:17:00 +00:00
|
|
|
def __init__(self, path):
|
|
|
|
self.path = path
|
2016-06-02 07:30:44 +00:00
|
|
|
self.fp = open(path, "rb")
|
2015-05-02 04:17:00 +00:00
|
|
|
self.map = mmap.mmap(self.fp.fileno(), 0, access=mmap.ACCESS_READ)
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self.map)
|
|
|
|
|
|
|
|
def __getitem__(self, x):
|
2016-06-02 07:30:44 +00:00
|
|
|
if isinstance(x, slice):
|
|
|
|
return self.map.__getitem__(x)
|
|
|
|
# A slice of length 1 returns a byte object (not an integer)
|
|
|
|
return self.map.__getitem__(slice(x, x+1 or self.map.size()))
|
2015-05-02 04:17:00 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
2015-05-30 00:03:13 +00:00
|
|
|
return "<%s" % self.path
|