From 73ce169e3d11eeabeb78143bd86edfdbc3e07fd9 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Sun, 12 Apr 2015 10:26:09 +1200 Subject: [PATCH] Initial outline of a cookie parsing and serialization module. --- .env | 5 ++ netlib/http_cookies.py | 133 ++++++++++++++++++++++++++++++++++++++ test/test_http_cookies.py | 106 ++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 .env create mode 100644 netlib/http_cookies.py create mode 100644 test/test_http_cookies.py diff --git a/.env b/.env new file mode 100644 index 000000000..7f847e29f --- /dev/null +++ b/.env @@ -0,0 +1,5 @@ +DIR=`dirname $0` +if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then + echo "Activating mitmproxy virtualenv..." + source $DIR/../venv.mitmproxy/bin/activate +fi diff --git a/netlib/http_cookies.py b/netlib/http_cookies.py new file mode 100644 index 000000000..e11e0f904 --- /dev/null +++ b/netlib/http_cookies.py @@ -0,0 +1,133 @@ +""" +A flexible module for cookie parsing and manipulation. + +We try to be as permissive as possible. Parsing accepts formats from RFC6265 an +RFC2109. Serialization follows RFC6265 strictly. + + http://tools.ietf.org/html/rfc6265 + http://tools.ietf.org/html/rfc2109 +""" + +import re + +import odict + + +def _read_until(s, start, term): + """ + Read until one of the characters in term is reached. + """ + if start == len(s): + return "", start+1 + for i in range(start, len(s)): + if s[i] in term: + return s[start:i], i + return s[start:i+1], i+1 + + +def _read_token(s, start): + """ + Read a token - the LHS of a token/value pair in a cookie. + """ + return _read_until(s, start, ";=") + + +def _read_quoted_string(s, start): + """ + start: offset to the first quote of the string to be read + + A sort of loose super-set of the various quoted string specifications. + + RFC6265 disallows backslashes or double quotes within quoted strings. + Prior RFCs use backslashes to escape. This leaves us free to apply + backslash escaping by default and be compatible with everything. + """ + escaping = False + ret = [] + # Skip the first quote + for i in range(start+1, len(s)): + if escaping: + ret.append(s[i]) + escaping = False + elif s[i] == '"': + break + elif s[i] == "\\": + escaping = True + pass + else: + ret.append(s[i]) + return "".join(ret), i+1 + + +def _read_value(s, start): + """ + Reads a value - the RHS of a token/value pair in a cookie. + """ + if s[start] == '"': + return _read_quoted_string(s, start) + else: + return _read_until(s, start, ";,") + + +def _read_pairs(s): + """ + Read pairs of lhs=rhs values. + """ + off = 0 + vals = [] + while 1: + lhs, off = _read_token(s, off) + rhs = None + if off < len(s): + if s[off] == "=": + rhs, off = _read_value(s, off+1) + vals.append([lhs.lstrip(), rhs]) + off += 1 + if not off < len(s): + break + return vals, off + + +ESCAPE = re.compile(r"([\"\\])") +SPECIAL = re.compile(r"^\w+$") + + +def _format_pairs(lst): + vals = [] + for k, v in lst: + if v is None: + vals.append(k) + else: + match = SPECIAL.search(v) + if match: + v = ESCAPE.sub(r"\1", v) + vals.append("%s=%s"%(k, v)) + return "; ".join(vals) + + +def parse_cookies(s): + """ + Parses a Cookie header value. + Returns an ODict object. + """ + pairs, off = _read_pairs(s) + return odict.ODict(pairs) + + +def unparse_cookies(od): + """ + Formats a Cookie header value. + """ + vals = [] + for i in od.lst: + vals.append("%s=%s"%(i[0], i[1])) + return "; ".join(vals) + + + +def parse_set_cookies(s): + start = 0 + + +def unparse_set_cookies(s): + pass diff --git a/test/test_http_cookies.py b/test/test_http_cookies.py new file mode 100644 index 000000000..b3f1f9146 --- /dev/null +++ b/test/test_http_cookies.py @@ -0,0 +1,106 @@ +from netlib import http_cookies, odict +import nose.tools + + +def test_read_token(): + tokens = [ + [("foo", 0), ("foo", 3)], + [("foo", 1), ("oo", 3)], + [(" foo", 1), ("foo", 4)], + [(" foo;", 1), ("foo", 4)], + [(" foo=", 1), ("foo", 4)], + [(" foo=bar", 1), ("foo", 4)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_token(*q), a) + + +def test_read_quoted_string(): + tokens = [ + [('"foo" x', 0), ("foo", 5)], + [('"f\oo" x', 0), ("foo", 6)], + [(r'"f\\o" x', 0), (r"f\o", 6)], + [(r'"f\\" x', 0), (r"f" + '\\', 5)], + [('"fo\\\"" x', 0), ("fo\"", 6)], + ] + for q, a in tokens: + nose.tools.eq_(http_cookies._read_quoted_string(*q), a) + + +def test_read_pairs(): + vals = [ + [ + "one", + [["one", None]] + ], + [ + "one=two", + [["one", "two"]] + ], + [ + 'one="two"', + [["one", "two"]] + ], + [ + 'one="two"; three=four', + [["one", "two"], ["three", "four"]] + ], + [ + 'one="two"; three=four; five', + [["one", "two"], ["three", "four"], ["five", None]] + ], + [ + 'one="\\"two"; three=four', + [["one", '"two'], ["three", "four"]] + ], + ] + for s, lst in vals: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + + +def test_pairs_roundtrips(): + pairs = [ + [ + "one=uno", + [["one", "uno"]] + ], + [ + "one", + [["one", None]] + ], + [ + "one=uno; two=due", + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="uno"; two="\due"', + [["one", "uno"], ["two", "due"]] + ], + [ + 'one="un\\"o"', + [["one", 'un"o']] + ], + [ + "one=uno; two; three=tre", + [["one", "uno"], ["two", None], ["three", "tre"]] + ], + [ + "_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; " + "_rcc2=53VdltWl+Ov6ordflA==;", + [ + ["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="], + ["_rcc2", "53VdltWl+Ov6ordflA=="] + ] + ] + ] + for s, lst in pairs: + ret, off = http_cookies._read_pairs(s) + nose.tools.eq_(ret, lst) + s2 = http_cookies._format_pairs(lst) + ret, off = http_cookies._read_pairs(s2) + nose.tools.eq_(ret, lst) + + +def test_parse_set_cookie(): + pass