Initial outline of a cookie parsing and serialization module.

This commit is contained in:
Aldo Cortesi 2015-04-12 10:26:09 +12:00
parent d5eff70b6e
commit 73ce169e3d
3 changed files with 244 additions and 0 deletions

5
.env Normal file
View File

@ -0,0 +1,5 @@
DIR=`dirname $0`
if [ -z "$VIRTUAL_ENV" ] && [ -f $DIR/../venv.mitmproxy/bin/activate ]; then
echo "Activating mitmproxy virtualenv..."
source $DIR/../venv.mitmproxy/bin/activate
fi

133
netlib/http_cookies.py Normal file
View File

@ -0,0 +1,133 @@
"""
A flexible module for cookie parsing and manipulation.
We try to be as permissive as possible. Parsing accepts formats from RFC6265 an
RFC2109. Serialization follows RFC6265 strictly.
http://tools.ietf.org/html/rfc6265
http://tools.ietf.org/html/rfc2109
"""
import re
import odict
def _read_until(s, start, term):
"""
Read until one of the characters in term is reached.
"""
if start == len(s):
return "", start+1
for i in range(start, len(s)):
if s[i] in term:
return s[start:i], i
return s[start:i+1], i+1
def _read_token(s, start):
"""
Read a token - the LHS of a token/value pair in a cookie.
"""
return _read_until(s, start, ";=")
def _read_quoted_string(s, start):
"""
start: offset to the first quote of the string to be read
A sort of loose super-set of the various quoted string specifications.
RFC6265 disallows backslashes or double quotes within quoted strings.
Prior RFCs use backslashes to escape. This leaves us free to apply
backslash escaping by default and be compatible with everything.
"""
escaping = False
ret = []
# Skip the first quote
for i in range(start+1, len(s)):
if escaping:
ret.append(s[i])
escaping = False
elif s[i] == '"':
break
elif s[i] == "\\":
escaping = True
pass
else:
ret.append(s[i])
return "".join(ret), i+1
def _read_value(s, start):
"""
Reads a value - the RHS of a token/value pair in a cookie.
"""
if s[start] == '"':
return _read_quoted_string(s, start)
else:
return _read_until(s, start, ";,")
def _read_pairs(s):
"""
Read pairs of lhs=rhs values.
"""
off = 0
vals = []
while 1:
lhs, off = _read_token(s, off)
rhs = None
if off < len(s):
if s[off] == "=":
rhs, off = _read_value(s, off+1)
vals.append([lhs.lstrip(), rhs])
off += 1
if not off < len(s):
break
return vals, off
ESCAPE = re.compile(r"([\"\\])")
SPECIAL = re.compile(r"^\w+$")
def _format_pairs(lst):
vals = []
for k, v in lst:
if v is None:
vals.append(k)
else:
match = SPECIAL.search(v)
if match:
v = ESCAPE.sub(r"\1", v)
vals.append("%s=%s"%(k, v))
return "; ".join(vals)
def parse_cookies(s):
"""
Parses a Cookie header value.
Returns an ODict object.
"""
pairs, off = _read_pairs(s)
return odict.ODict(pairs)
def unparse_cookies(od):
"""
Formats a Cookie header value.
"""
vals = []
for i in od.lst:
vals.append("%s=%s"%(i[0], i[1]))
return "; ".join(vals)
def parse_set_cookies(s):
start = 0
def unparse_set_cookies(s):
pass

106
test/test_http_cookies.py Normal file
View File

@ -0,0 +1,106 @@
from netlib import http_cookies, odict
import nose.tools
def test_read_token():
tokens = [
[("foo", 0), ("foo", 3)],
[("foo", 1), ("oo", 3)],
[(" foo", 1), ("foo", 4)],
[(" foo;", 1), ("foo", 4)],
[(" foo=", 1), ("foo", 4)],
[(" foo=bar", 1), ("foo", 4)],
]
for q, a in tokens:
nose.tools.eq_(http_cookies._read_token(*q), a)
def test_read_quoted_string():
tokens = [
[('"foo" x', 0), ("foo", 5)],
[('"f\oo" x', 0), ("foo", 6)],
[(r'"f\\o" x', 0), (r"f\o", 6)],
[(r'"f\\" x', 0), (r"f" + '\\', 5)],
[('"fo\\\"" x', 0), ("fo\"", 6)],
]
for q, a in tokens:
nose.tools.eq_(http_cookies._read_quoted_string(*q), a)
def test_read_pairs():
vals = [
[
"one",
[["one", None]]
],
[
"one=two",
[["one", "two"]]
],
[
'one="two"',
[["one", "two"]]
],
[
'one="two"; three=four',
[["one", "two"], ["three", "four"]]
],
[
'one="two"; three=four; five',
[["one", "two"], ["three", "four"], ["five", None]]
],
[
'one="\\"two"; three=four',
[["one", '"two'], ["three", "four"]]
],
]
for s, lst in vals:
ret, off = http_cookies._read_pairs(s)
nose.tools.eq_(ret, lst)
def test_pairs_roundtrips():
pairs = [
[
"one=uno",
[["one", "uno"]]
],
[
"one",
[["one", None]]
],
[
"one=uno; two=due",
[["one", "uno"], ["two", "due"]]
],
[
'one="uno"; two="\due"',
[["one", "uno"], ["two", "due"]]
],
[
'one="un\\"o"',
[["one", 'un"o']]
],
[
"one=uno; two; three=tre",
[["one", "uno"], ["two", None], ["three", "tre"]]
],
[
"_lvs2=zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g=; "
"_rcc2=53VdltWl+Ov6ordflA==;",
[
["_lvs2", "zHai1+Hq+Tc2vmc2r4GAbdOI5Jopg3EwsdUT9g="],
["_rcc2", "53VdltWl+Ov6ordflA=="]
]
]
]
for s, lst in pairs:
ret, off = http_cookies._read_pairs(s)
nose.tools.eq_(ret, lst)
s2 = http_cookies._format_pairs(lst)
ret, off = http_cookies._read_pairs(s2)
nose.tools.eq_(ret, lst)
def test_parse_set_cookie():
pass