mitmproxy/libmproxy/utils.py

from __future__ import (absolute_import, print_function, division)
import os
import datetime
import re
import time
import json


def timestamp():
    """
        Returns a serializable UTC timestamp.
    """
    return time.time()


def format_timestamp(s):
    s = time.localtime(s)
    d = datetime.datetime.fromtimestamp(time.mktime(s))
    return d.strftime("%Y-%m-%d %H:%M:%S")


def format_timestamp_with_milli(s):
    d = datetime.datetime.fromtimestamp(s)
    return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]


def isBin(s):
    """
        Does this string have any non-ASCII characters?
    """
    for i in s:
        i = ord(i)
        if i < 9 or 13 < i < 32 or 126 < i:
            return True
    return False


def isMostlyBin(s):
    s = s[:100]
    return sum(isBin(ch) for ch in s) / len(s) > 0.3


def isXML(s):
    for i in s:
        if i in "\n \t":
            continue
        elif i == "<":
            return True
        else:
            return False


def pretty_json(s):
    try:
        p = json.loads(s)
    except ValueError:
        return None
    return json.dumps(p, sort_keys=True, indent=4)


def pretty_duration(secs):
    formatters = [
        (100, "{:.0f}s"),
        (10, "{:2.1f}s"),
        (1, "{:1.2f}s"),
    ]

    for limit, formatter in formatters:
        if secs >= limit:
            return formatter.format(secs)
    # less than 1 sec
    return "{:.0f}ms".format(secs * 1000)


class Data:

    def __init__(self, name):
        m = __import__(name)
        dirname, _ = os.path.split(m.__file__)
        self.dirname = os.path.abspath(dirname)

    def path(self, path):
        """
            Returns a path to the package data housed at 'path' under this
            module.Path can be a path to a file, or to a directory.

            This function will raise ValueError if the path does not exist.
        """
        fullpath = os.path.join(self.dirname, path)
        if not os.path.exists(fullpath):
            raise ValueError("dataPath: %s does not exist." % fullpath)
        return fullpath
pkg_data = Data(__name__)


class LRUCache:

    """
        A simple LRU cache for generated values.
    """

    def __init__(self, size=100):
        self.size = size
        self.cache = {}
        self.cacheList = []

    def get(self, gen, *args):
        """
            gen: A (presumably expensive) generator function. The identity of
            gen is NOT taken into account by the cache.
            *args: A list of immutable arguments, used to establish identiy by
            *the cache, and passed to gen to generate values.
        """
        if args in self.cache:
            self.cacheList.remove(args)
            self.cacheList.insert(0, args)
            return self.cache[args]
        else:
            ret = gen(*args)
            self.cacheList.insert(0, args)
            self.cache[args] = ret
            if len(self.cacheList) > self.size:
                d = self.cacheList.pop()
                self.cache.pop(d)
            return ret


def clean_hanging_newline(t):
    """
        Many editors will silently add a newline to the final line of a
        document (I'm looking at you, Vim). This function fixes this common
        problem at the risk of removing a hanging newline in the rare cases
        where the user actually intends it.
    """
    if t and t[-1] == "\n":
        return t[:-1]
    return t


def parse_size(s):
    """
        Parses a size specification. Valid specifications are:

            123: bytes
            123k: kilobytes
            123m: megabytes
            123g: gigabytes
    """
    if not s:
        return None
    mult = None
    if s[-1].lower() == "k":
        mult = 1024**1
    elif s[-1].lower() == "m":
        mult = 1024**2
    elif s[-1].lower() == "g":
        mult = 1024**3

    if mult:
        s = s[:-1]
    else:
        mult = 1
    try:
        return int(s) * mult
    except ValueError:
        raise ValueError("Invalid size specification: %s" % s)


def safe_subn(pattern, repl, target, *args, **kwargs):
    """
        There are Unicode conversion problems with re.subn. We try to smooth
        that over by casting the pattern and replacement to strings. We really
        need a better solution that is aware of the actual content ecoding.
    """
    return re.subn(str(pattern), str(repl), target, *args, **kwargs)
improve display of non-ascii contents fixes #283 2015-09-12 15:10:38 +00:00			`from __future__ import (absolute_import, print_function, division)`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`import os`
			`import datetime`
			`import re`
			`import time`
JSON pretty-printing. Also rename the display modes ("pretty" instead of "indent"), and expand the built-in documentation. 2011-06-30 01:27:27 +00:00			`import json`
Store timestamps on flow components as a UTC time tuple. Format is: (tm_year,tm_mon,tm_mday,tm_hour,tm_min, tm_sec,tm_wday,tm_yday,tm_isdst) 2011-03-07 00:46:02 +00:00
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00
Store timestamps on flow components as a UTC time tuple. Format is: (tm_year,tm_mon,tm_mday,tm_hour,tm_min, tm_sec,tm_wday,tm_yday,tm_isdst) 2011-03-07 00:46:02 +00:00			`def timestamp():`
Think harder about timestamps. Just save seconds since the epoch as a float. 2011-03-08 23:18:08 +00:00			`"""`
			`Returns a serializable UTC timestamp.`
			`"""`
			`return time.time()`
Store timestamps on flow components as a UTC time tuple. Format is: (tm_year,tm_mon,tm_mday,tm_hour,tm_min, tm_sec,tm_wday,tm_yday,tm_isdst) 2011-03-07 00:46:02 +00:00
Initial checkin. 2010-02-16 04:09:07 +00:00
removed unused parameter 2015-03-03 20:49:47 +00:00			`def format_timestamp(s):`
fixed formatting and added a 'test' (sort of) 2015-03-03 02:22:44 +00:00			`s = time.localtime(s)`
			`d = datetime.datetime.fromtimestamp(time.mktime(s))`
			`return d.strftime("%Y-%m-%d %H:%M:%S")`
Add timestamps to flows. For now, these are only displayed on the connection view screen, with second granularity. 2011-02-03 00:30:47 +00:00
Minor refactor to PR #496 2015-03-04 17:02:01 +00:00
fixed formatting and added a 'test' (sort of) 2015-03-03 02:22:44 +00:00			`def format_timestamp_with_milli(s):`
			`d = datetime.datetime.fromtimestamp(s)`
Minor refactor to PR #496 2015-03-04 17:02:01 +00:00			`return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]`


Initial checkin. 2010-02-16 04:09:07 +00:00			`def isBin(s):`
			`"""`
			`Does this string have any non-ASCII characters?`
			`"""`
			`for i in s:`
			`i = ord(i)`
improve display of non-ascii contents fixes #283 2015-09-12 15:10:38 +00:00			`if i < 9 or 13 < i < 32 or 126 < i:`
Initial checkin. 2010-02-16 04:09:07 +00:00			`return True`
			`return False`


improve display of non-ascii contents fixes #283 2015-09-12 15:10:38 +00:00			`def isMostlyBin(s):`
			`s = s[:100]`
code formatting: fix whitespace issues 2016-01-27 09:12:18 +00:00			`return sum(isBin(ch) for ch in s) / len(s) > 0.3`
improve display of non-ascii contents fixes #283 2015-09-12 15:10:38 +00:00

Try not to hang when user views large request & response bodies Two different strategies here: - Use a simple heuristic to detect if we're looking at XML data when indent mode is used. On non-XML data we can hang even on small documents. - Only view partial data for large bodies. At the moment the cutoff is 100k. I might finetune this later. 2011-06-27 03:59:17 +00:00			`def isXML(s):`
			`for i in s:`
			`if i in "\n \t":`
			`continue`
			`elif i == "<":`
			`return True`
			`else:`
			`return False`


JSON pretty-printing. Also rename the display modes ("pretty" instead of "indent"), and expand the built-in documentation. 2011-06-30 01:27:27 +00:00			`def pretty_json(s):`
			`try:`
			`p = json.loads(s)`
			`except ValueError:`
			`return None`
fix bugs 2015-09-11 11:37:52 +00:00			`return json.dumps(p, sort_keys=True, indent=4)`
JSON pretty-printing. Also rename the display modes ("pretty" instead of "indent"), and expand the built-in documentation. 2011-06-30 01:27:27 +00:00

added to flowlist / flowdetail time elapsed between request sent and response received 2015-02-26 21:14:20 +00:00			`def pretty_duration(secs):`
			`formatters = [`
Minor refactor to PR #496 2015-03-04 17:02:01 +00:00			`(100, "{:.0f}s"),`
			`(10, "{:2.1f}s"),`
			`(1, "{:1.2f}s"),`
added to flowlist / flowdetail time elapsed between request sent and response received 2015-02-26 21:14:20 +00:00			`]`

			`for limit, formatter in formatters:`
			`if secs >= limit:`
			`return formatter.format(secs)`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`# less than 1 sec`
			`return "{:.0f}ms".format(secs * 1000)`
Initial checkin. 2010-02-16 04:09:07 +00:00
pretty_size now lives in netlib.utils 2015-04-30 00:18:01 +00:00
Initial checkin. 2010-02-16 04:09:07 +00:00			`class Data:`
code formatting: fix whitespace issues 2016-01-27 09:12:18 +00:00
Initial checkin. 2010-02-16 04:09:07 +00:00			`def __init__(self, name):`
			`m = __import__(name)`
			`dirname, _ = os.path.split(m.__file__)`
			`self.dirname = os.path.abspath(dirname)`

			`def path(self, path):`
			`"""`
			`Returns a path to the package data housed at 'path' under this`
			`module.Path can be a path to a file, or to a directory.`

			`This function will raise ValueError if the path does not exist.`
			`"""`
			`fullpath = os.path.join(self.dirname, path)`
			`if not os.path.exists(fullpath):`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`raise ValueError("dataPath: %s does not exist." % fullpath)`
Initial checkin. 2010-02-16 04:09:07 +00:00			`return fullpath`
General cleanup. Cut out unused variables and code, generally shut up pychecker as much as is reasonable. 2011-08-02 04:14:33 +00:00			`pkg_data = Data(__name__)`
Initial checkin. 2010-02-16 04:09:07 +00:00

Improve responsiveness of request and response viewing. - Computing the view of a large body is expensive, so we introduce an LRU cache to hold the latest 20 results. - Use ListView more correctly, passing it individual urwid.Text snippets, rather than a single large one. This hugely improves render time. 2011-03-15 00:05:33 +00:00			`class LRUCache:`
code formatting: fix whitespace issues 2016-01-27 09:12:18 +00:00
Improve responsiveness of request and response viewing. - Computing the view of a large body is expensive, so we introduce an LRU cache to hold the latest 20 results. - Use ListView more correctly, passing it individual urwid.Text snippets, rather than a single large one. This hugely improves render time. 2011-03-15 00:05:33 +00:00			`"""`
Replace far-too-clever decorator LRU cache with something simpler 2015-03-22 08:00:41 +00:00			`A simple LRU cache for generated values.`
Improve responsiveness of request and response viewing. - Computing the view of a large body is expensive, so we introduce an LRU cache to hold the latest 20 results. - Use ListView more correctly, passing it individual urwid.Text snippets, rather than a single large one. This hugely improves render time. 2011-03-15 00:05:33 +00:00			`"""`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00
Improve responsiveness of request and response viewing. - Computing the view of a large body is expensive, so we introduce an LRU cache to hold the latest 20 results. - Use ListView more correctly, passing it individual urwid.Text snippets, rather than a single large one. This hugely improves render time. 2011-03-15 00:05:33 +00:00			`def __init__(self, size=100):`
			`self.size = size`
Replace far-too-clever decorator LRU cache with something simpler 2015-03-22 08:00:41 +00:00			`self.cache = {}`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`self.cacheList = []`
Replace far-too-clever decorator LRU cache with something simpler 2015-03-22 08:00:41 +00:00
			`def get(self, gen, *args):`
			`"""`
			`gen: A (presumably expensive) generator function. The identity of`
			`gen is NOT taken into account by the cache.`
			`*args: A list of immutable arguments, used to establish identiy by`
			`*the cache, and passed to gen to generate values.`
			`"""`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`if args in self.cache:`
Replace far-too-clever decorator LRU cache with something simpler 2015-03-22 08:00:41 +00:00			`self.cacheList.remove(args)`
			`self.cacheList.insert(0, args)`
			`return self.cache[args]`
			`else:`
			`ret = gen(*args)`
			`self.cacheList.insert(0, args)`
			`self.cache[args] = ret`
			`if len(self.cacheList) > self.size:`
			`d = self.cacheList.pop()`
			`self.cache.pop(d)`
			`return ret`
Improve responsiveness of request and response viewing. - Computing the view of a large body is expensive, so we introduce an LRU cache to hold the latest 20 results. - Use ListView more correctly, passing it individual urwid.Text snippets, rather than a single large one. This hugely improves render time. 2011-03-15 00:05:33 +00:00
Move all HTTP objects to flow.py That's Request, Response, ClientConnect, ClientDisconnect, Error, and Headers. 2011-08-03 10:38:23 +00:00
KVEditor: "e" shortcut spawns an external editor on a field. 2012-02-08 05:25:00 +00:00			`def clean_hanging_newline(t):`
			`"""`
			`Many editors will silently add a newline to the final line of a`
			`document (I'm looking at you, Vim). This function fixes this common`
			`problem at the risk of removing a hanging newline in the rare cases`
			`where the user actually intends it.`
			`"""`
Add an "r" shortcut in grid editors to read value from file. 2012-08-25 00:21:45 +00:00			`if t and t[-1] == "\n":`
KVEditor: "e" shortcut spawns an external editor on a field. 2012-02-08 05:25:00 +00:00			`return t[:-1]`
			`return t`


Add HTTP body size limit specification to command-line tools. 2011-09-09 03:27:31 +00:00			`def parse_size(s):`
			`"""`
			`Parses a size specification. Valid specifications are:`
Fix an issue caused by some editors when editing a request/response body. Many editors make it hard save a file without a terminating newline on the last line. When editing message bodies, this can cause problems. For now, I just strip the newlines off the end of the body when we return from an editor. 2012-01-20 23:43:00 +00:00
Add HTTP body size limit specification to command-line tools. 2011-09-09 03:27:31 +00:00			`123: bytes`
			`123k: kilobytes`
			`123m: megabytes`
			`123g: gigabytes`
			`"""`
			`if not s:`
			`return None`
			`mult = None`
			`if s[-1].lower() == "k":`
			`mult = 1024**1`
			`elif s[-1].lower() == "m":`
			`mult = 1024**2`
			`elif s[-1].lower() == "g":`
			`mult = 1024**3`

			`if mult:`
			`s = s[:-1]`
			`else:`
			`mult = 1`
			`try:`
			`return int(s) * mult`
			`except ValueError:`
Add coding style check, reformat. 2015-05-30 00:03:28 +00:00			`raise ValueError("Invalid size specification: %s" % s)`
Fix a crashing bug when replacing text in a flow with unicode bodies. 2012-05-26 01:10:31 +00:00

			`def safe_subn(pattern, repl, target, args, *kwargs):`
			`"""`
			`There are Unicode conversion problems with re.subn. We try to smooth`
			`that over by casting the pattern and replacement to strings. We really`
			`need a better solution that is aware of the actual content ecoding.`
			`"""`
			`return re.subn(str(pattern), str(repl), target, args, *kwargs)`