mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-26 02:10:59 +00:00
commit
2419ab153d
216
examples/har_dump.py
Normal file
216
examples/har_dump.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
"""
|
||||||
|
This inline script can be used to dump flows as HAR files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import base64
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
import mitmproxy
|
||||||
|
|
||||||
|
from netlib import version
|
||||||
|
from netlib import strutils
|
||||||
|
from netlib.http import cookies
|
||||||
|
|
||||||
|
HAR = {}
|
||||||
|
|
||||||
|
# A list of server seen till now is maintained so we can avoid
|
||||||
|
# using 'connect' time for entries that use an existing connection.
|
||||||
|
SERVERS_SEEN = set()
|
||||||
|
|
||||||
|
|
||||||
|
def start():
|
||||||
|
"""
|
||||||
|
Called once on script startup before any other events.
|
||||||
|
"""
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
raise ValueError(
|
||||||
|
'Usage: -s "har_dump.py filename" '
|
||||||
|
'(- will output to stdout, filenames ending with .zhar '
|
||||||
|
'will result in compressed har)'
|
||||||
|
)
|
||||||
|
|
||||||
|
HAR.update({
|
||||||
|
"log": {
|
||||||
|
"version": "1.2",
|
||||||
|
"creator": {
|
||||||
|
"name": "mitmproxy har_dump",
|
||||||
|
"version": "0.1",
|
||||||
|
"comment": "mitmproxy version %s" % version.MITMPROXY
|
||||||
|
},
|
||||||
|
"entries": []
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def response(flow):
|
||||||
|
"""
|
||||||
|
Called when a server response has been received.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# -1 indicates that these values do not apply to current request
|
||||||
|
ssl_time = -1
|
||||||
|
connect_time = -1
|
||||||
|
|
||||||
|
if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
|
||||||
|
connect_time = (flow.server_conn.timestamp_tcp_setup -
|
||||||
|
flow.server_conn.timestamp_start)
|
||||||
|
|
||||||
|
if flow.server_conn.timestamp_ssl_setup is not None:
|
||||||
|
ssl_time = (flow.server_conn.timestamp_ssl_setup -
|
||||||
|
flow.server_conn.timestamp_tcp_setup)
|
||||||
|
|
||||||
|
SERVERS_SEEN.add(flow.server_conn)
|
||||||
|
|
||||||
|
# Calculate raw timings from timestamps. DNS timings can not be calculated
|
||||||
|
# for lack of a way to measure it. The same goes for HAR blocked.
|
||||||
|
# mitmproxy will open a server connection as soon as it receives the host
|
||||||
|
# and port from the client connection. So, the time spent waiting is actually
|
||||||
|
# spent waiting between request.timestamp_end and response.timestamp_start
|
||||||
|
# thus it correlates to HAR wait instead.
|
||||||
|
timings_raw = {
|
||||||
|
'send': flow.request.timestamp_end - flow.request.timestamp_start,
|
||||||
|
'receive': flow.response.timestamp_end - flow.response.timestamp_start,
|
||||||
|
'wait': flow.response.timestamp_start - flow.request.timestamp_end,
|
||||||
|
'connect': connect_time,
|
||||||
|
'ssl': ssl_time,
|
||||||
|
}
|
||||||
|
|
||||||
|
# HAR timings are integers in ms, so we re-encode the raw timings to that format.
|
||||||
|
timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])
|
||||||
|
|
||||||
|
# full_time is the sum of all timings.
|
||||||
|
# Timings set to -1 will be ignored as per spec.
|
||||||
|
full_time = sum(v for v in timings.values() if v > -1)
|
||||||
|
|
||||||
|
started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start))
|
||||||
|
|
||||||
|
# Response body size and encoding
|
||||||
|
response_body_size = len(flow.response.raw_content)
|
||||||
|
response_body_decoded_size = len(flow.response.content)
|
||||||
|
response_body_compression = response_body_decoded_size - response_body_size
|
||||||
|
|
||||||
|
entry = {
|
||||||
|
"startedDateTime": started_date_time,
|
||||||
|
"time": full_time,
|
||||||
|
"request": {
|
||||||
|
"method": flow.request.method,
|
||||||
|
"url": flow.request.url,
|
||||||
|
"httpVersion": flow.request.http_version,
|
||||||
|
"cookies": format_request_cookies(flow.request.cookies.fields),
|
||||||
|
"headers": name_value(flow.request.headers),
|
||||||
|
"queryString": name_value(flow.request.query or {}),
|
||||||
|
"headersSize": len(str(flow.request.headers)),
|
||||||
|
"bodySize": len(flow.request.content),
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"status": flow.response.status_code,
|
||||||
|
"statusText": flow.response.reason,
|
||||||
|
"httpVersion": flow.response.http_version,
|
||||||
|
"cookies": format_response_cookies(flow.response.cookies.fields),
|
||||||
|
"headers": name_value(flow.response.headers),
|
||||||
|
"content": {
|
||||||
|
"size": response_body_size,
|
||||||
|
"compression": response_body_compression,
|
||||||
|
"mimeType": flow.response.headers.get('Content-Type', '')
|
||||||
|
},
|
||||||
|
"redirectURL": flow.response.headers.get('Location', ''),
|
||||||
|
"headersSize": len(str(flow.response.headers)),
|
||||||
|
"bodySize": response_body_size,
|
||||||
|
},
|
||||||
|
"cache": {},
|
||||||
|
"timings": timings,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store binay data as base64
|
||||||
|
if strutils.is_mostly_bin(flow.response.content):
|
||||||
|
b64 = base64.b64encode(flow.response.content)
|
||||||
|
entry["response"]["content"]["text"] = b64.decode('ascii')
|
||||||
|
entry["response"]["content"]["encoding"] = "base64"
|
||||||
|
else:
|
||||||
|
entry["response"]["content"]["text"] = flow.response.text
|
||||||
|
|
||||||
|
if flow.request.method in ["POST", "PUT", "PATCH"]:
|
||||||
|
entry["request"]["postData"] = {
|
||||||
|
"mimeType": flow.request.headers.get("Content-Type", "").split(";")[0],
|
||||||
|
"text": flow.request.content,
|
||||||
|
"params": name_value(flow.request.urlencoded_form)
|
||||||
|
}
|
||||||
|
|
||||||
|
if flow.server_conn:
|
||||||
|
entry["serverIPAddress"] = str(flow.server_conn.ip_address.address[0])
|
||||||
|
|
||||||
|
HAR["log"]["entries"].append(entry)
|
||||||
|
|
||||||
|
|
||||||
|
def done():
|
||||||
|
"""
|
||||||
|
Called once on script shutdown, after any other events.
|
||||||
|
"""
|
||||||
|
dump_file = sys.argv[1]
|
||||||
|
|
||||||
|
if dump_file == '-':
|
||||||
|
mitmproxy.ctx.log(pprint.pformat(HAR))
|
||||||
|
else:
|
||||||
|
json_dump = json.dumps(HAR, indent=2)
|
||||||
|
|
||||||
|
if dump_file.endswith('.zhar'):
|
||||||
|
json_dump = zlib.compress(json_dump, 9)
|
||||||
|
|
||||||
|
with open(dump_file, "w") as f:
|
||||||
|
f.write(json_dump)
|
||||||
|
|
||||||
|
mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump))
|
||||||
|
|
||||||
|
|
||||||
|
def format_datetime(dt):
|
||||||
|
return dt.replace(tzinfo=pytz.timezone("UTC")).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def format_cookies(cookie_list):
|
||||||
|
rv = []
|
||||||
|
|
||||||
|
for name, value, attrs in cookie_list:
|
||||||
|
cookie_har = {
|
||||||
|
"name": name,
|
||||||
|
"value": value,
|
||||||
|
}
|
||||||
|
|
||||||
|
# HAR only needs some attributes
|
||||||
|
for key in ["path", "domain", "comment"]:
|
||||||
|
if key in attrs:
|
||||||
|
cookie_har[key] = attrs[key]
|
||||||
|
|
||||||
|
# These keys need to be boolean!
|
||||||
|
for key in ["httpOnly", "secure"]:
|
||||||
|
cookie_har[key] = bool(key in attrs)
|
||||||
|
|
||||||
|
# Expiration time needs to be formatted
|
||||||
|
expire_ts = cookies.get_expiration_ts(attrs)
|
||||||
|
if expire_ts is not None:
|
||||||
|
cookie_har["expires"] = format_datetime(datetime.fromtimestamp(expire_ts))
|
||||||
|
|
||||||
|
rv.append(cookie_har)
|
||||||
|
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
|
def format_request_cookies(fields):
|
||||||
|
return format_cookies(cookies.group_cookies(fields))
|
||||||
|
|
||||||
|
|
||||||
|
def format_response_cookies(fields):
|
||||||
|
return format_cookies((c[0], c[1].value, c[1].attrs) for c in fields)
|
||||||
|
|
||||||
|
|
||||||
|
def name_value(obj):
|
||||||
|
"""
|
||||||
|
Convert (key, value) pairs to HAR format.
|
||||||
|
"""
|
||||||
|
return [{"name": k, "value": v} for k, v in obj.items()]
|
@ -1,264 +0,0 @@
|
|||||||
"""
|
|
||||||
This inline script utilizes harparser.HAR from
|
|
||||||
https://github.com/JustusW/harparser to generate a HAR log object.
|
|
||||||
"""
|
|
||||||
import mitmproxy.ctx
|
|
||||||
import six
|
|
||||||
import sys
|
|
||||||
import pytz
|
|
||||||
from harparser import HAR
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
class _HARLog(HAR.log):
|
|
||||||
# The attributes need to be registered here for them to actually be
|
|
||||||
# available later via self. This is due to HAREncodable linking __getattr__
|
|
||||||
# to __getitem__. Anything that is set only in __init__ will just be added
|
|
||||||
# as key/value pair to self.__classes__.
|
|
||||||
__page_list__ = []
|
|
||||||
__page_count__ = 0
|
|
||||||
__page_ref__ = {}
|
|
||||||
|
|
||||||
def __init__(self, page_list=[]):
|
|
||||||
self.__page_list__ = page_list
|
|
||||||
self.__page_count__ = 0
|
|
||||||
self.__page_ref__ = {}
|
|
||||||
|
|
||||||
HAR.log.__init__(self, {"version": "1.2",
|
|
||||||
"creator": {"name": "MITMPROXY HARExtractor",
|
|
||||||
"version": "0.1",
|
|
||||||
"comment": ""},
|
|
||||||
"pages": [],
|
|
||||||
"entries": []})
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.__init__(self.__page_list__)
|
|
||||||
|
|
||||||
def add(self, obj):
|
|
||||||
if isinstance(obj, HAR.pages):
|
|
||||||
self['pages'].append(obj)
|
|
||||||
if isinstance(obj, HAR.entries):
|
|
||||||
self['entries'].append(obj)
|
|
||||||
|
|
||||||
def create_page_id(self):
|
|
||||||
self.__page_count__ += 1
|
|
||||||
return "autopage_%s" % str(self.__page_count__)
|
|
||||||
|
|
||||||
def set_page_ref(self, page, ref):
|
|
||||||
self.__page_ref__[page] = ref
|
|
||||||
|
|
||||||
def get_page_ref(self, page):
|
|
||||||
return self.__page_ref__.get(page, None)
|
|
||||||
|
|
||||||
def get_page_list(self):
|
|
||||||
return self.__page_list__
|
|
||||||
|
|
||||||
|
|
||||||
class Context(object):
|
|
||||||
pass
|
|
||||||
|
|
||||||
context = Context()
|
|
||||||
|
|
||||||
|
|
||||||
def start():
|
|
||||||
"""
|
|
||||||
On start we create a HARLog instance. You will have to adapt this to
|
|
||||||
suit your actual needs of HAR generation. As it will probably be
|
|
||||||
necessary to cluster logs by IPs or reset them from time to time.
|
|
||||||
"""
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
raise RuntimeError(
|
|
||||||
"har_extractor.py does not work on Python 3. "
|
|
||||||
"Please check out https://github.com/mitmproxy/mitmproxy/issues/1320 "
|
|
||||||
"if you want to help making this work again."
|
|
||||||
)
|
|
||||||
context.dump_file = None
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
context.dump_file = sys.argv[1]
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
'Usage: -s "har_extractor.py filename" '
|
|
||||||
'(- will output to stdout, filenames ending with .zhar '
|
|
||||||
'will result in compressed har)'
|
|
||||||
)
|
|
||||||
context.HARLog = _HARLog()
|
|
||||||
context.seen_server = set()
|
|
||||||
|
|
||||||
|
|
||||||
def response(flow):
|
|
||||||
"""
|
|
||||||
Called when a server response has been received. At the time of this
|
|
||||||
message both a request and a response are present and completely done.
|
|
||||||
"""
|
|
||||||
# Values are converted from float seconds to int milliseconds later.
|
|
||||||
ssl_time = -.001
|
|
||||||
connect_time = -.001
|
|
||||||
if flow.server_conn not in context.seen_server:
|
|
||||||
# Calculate the connect_time for this server_conn. Afterwards add it to
|
|
||||||
# seen list, in order to avoid the connect_time being present in entries
|
|
||||||
# that use an existing connection.
|
|
||||||
connect_time = (flow.server_conn.timestamp_tcp_setup -
|
|
||||||
flow.server_conn.timestamp_start)
|
|
||||||
context.seen_server.add(flow.server_conn)
|
|
||||||
|
|
||||||
if flow.server_conn.timestamp_ssl_setup is not None:
|
|
||||||
# Get the ssl_time for this server_conn as the difference between
|
|
||||||
# the start of the successful tcp setup and the successful ssl
|
|
||||||
# setup. If no ssl setup has been made it is left as -1 since it
|
|
||||||
# doesn't apply to this connection.
|
|
||||||
ssl_time = (flow.server_conn.timestamp_ssl_setup -
|
|
||||||
flow.server_conn.timestamp_tcp_setup)
|
|
||||||
|
|
||||||
# Calculate the raw timings from the different timestamps present in the
|
|
||||||
# request and response object. For lack of a way to measure it dns timings
|
|
||||||
# can not be calculated. The same goes for HAR blocked: MITMProxy will open
|
|
||||||
# a server connection as soon as it receives the host and port from the
|
|
||||||
# client connection. So the time spent waiting is actually spent waiting
|
|
||||||
# between request.timestamp_end and response.timestamp_start thus it
|
|
||||||
# correlates to HAR wait instead.
|
|
||||||
timings_raw = {
|
|
||||||
'send': flow.request.timestamp_end - flow.request.timestamp_start,
|
|
||||||
'wait': flow.response.timestamp_start - flow.request.timestamp_end,
|
|
||||||
'receive': flow.response.timestamp_end - flow.response.timestamp_start,
|
|
||||||
'connect': connect_time,
|
|
||||||
'ssl': ssl_time
|
|
||||||
}
|
|
||||||
|
|
||||||
# HAR timings are integers in ms, so we have to re-encode the raw timings to
|
|
||||||
# that format.
|
|
||||||
timings = dict([(k, int(1000 * v)) for k, v in six.iteritems(timings_raw)])
|
|
||||||
|
|
||||||
# The full_time is the sum of all timings.
|
|
||||||
# Timings set to -1 will be ignored as per spec.
|
|
||||||
full_time = sum(v for v in timings.values() if v > -1)
|
|
||||||
|
|
||||||
started_date_time = datetime.utcfromtimestamp(
|
|
||||||
flow.request.timestamp_start).replace(tzinfo=pytz.timezone("UTC")).isoformat()
|
|
||||||
|
|
||||||
request_query_string = [{"name": k, "value": v}
|
|
||||||
for k, v in flow.request.query or {}]
|
|
||||||
|
|
||||||
response_body_size = len(flow.response.content)
|
|
||||||
response_body_decoded_size = len(flow.response.content)
|
|
||||||
response_body_compression = response_body_decoded_size - response_body_size
|
|
||||||
|
|
||||||
entry = HAR.entries({
|
|
||||||
"startedDateTime": started_date_time,
|
|
||||||
"time": full_time,
|
|
||||||
"request": {
|
|
||||||
"method": flow.request.method,
|
|
||||||
"url": flow.request.url,
|
|
||||||
"httpVersion": flow.request.http_version,
|
|
||||||
"cookies": format_cookies(flow.request.cookies),
|
|
||||||
"headers": format_headers(flow.request.headers),
|
|
||||||
"queryString": request_query_string,
|
|
||||||
"headersSize": len(str(flow.request.headers)),
|
|
||||||
"bodySize": len(flow.request.content),
|
|
||||||
},
|
|
||||||
"response": {
|
|
||||||
"status": flow.response.status_code,
|
|
||||||
"statusText": flow.response.reason,
|
|
||||||
"httpVersion": flow.response.http_version,
|
|
||||||
"cookies": format_cookies(flow.response.cookies),
|
|
||||||
"headers": format_headers(flow.response.headers),
|
|
||||||
"content": {
|
|
||||||
"size": response_body_size,
|
|
||||||
"compression": response_body_compression,
|
|
||||||
"mimeType": flow.response.headers.get('Content-Type', '')
|
|
||||||
},
|
|
||||||
"redirectURL": flow.response.headers.get('Location', ''),
|
|
||||||
"headersSize": len(str(flow.response.headers)),
|
|
||||||
"bodySize": response_body_size,
|
|
||||||
},
|
|
||||||
"cache": {},
|
|
||||||
"timings": timings,
|
|
||||||
})
|
|
||||||
|
|
||||||
# If the current url is in the page list of context.HARLog or
|
|
||||||
# does not have a referrer, we add it as a new pages object.
|
|
||||||
is_new_page = (
|
|
||||||
flow.request.url in context.HARLog.get_page_list() or
|
|
||||||
flow.request.headers.get('Referer') is None
|
|
||||||
)
|
|
||||||
if is_new_page:
|
|
||||||
page_id = context.HARLog.create_page_id()
|
|
||||||
context.HARLog.add(
|
|
||||||
HAR.pages({
|
|
||||||
"startedDateTime": entry['startedDateTime'],
|
|
||||||
"id": page_id,
|
|
||||||
"title": flow.request.url,
|
|
||||||
"pageTimings": {}
|
|
||||||
})
|
|
||||||
)
|
|
||||||
context.HARLog.set_page_ref(flow.request.url, page_id)
|
|
||||||
entry['pageref'] = page_id
|
|
||||||
|
|
||||||
# Lookup the referer in the page_ref of context.HARLog to point this entries
|
|
||||||
# pageref attribute to the right pages object, then set it as a new
|
|
||||||
# reference to build a reference tree.
|
|
||||||
elif context.HARLog.get_page_ref(flow.request.headers.get('Referer')) is not None:
|
|
||||||
entry['pageref'] = context.HARLog.get_page_ref(
|
|
||||||
flow.request.headers['Referer']
|
|
||||||
)
|
|
||||||
context.HARLog.set_page_ref(
|
|
||||||
flow.request.headers['Referer'], entry['pageref']
|
|
||||||
)
|
|
||||||
|
|
||||||
context.HARLog.add(entry)
|
|
||||||
|
|
||||||
|
|
||||||
def done():
|
|
||||||
"""
|
|
||||||
Called once on script shutdown, after any other events.
|
|
||||||
"""
|
|
||||||
import pprint
|
|
||||||
import json
|
|
||||||
|
|
||||||
json_dump = context.HARLog.json()
|
|
||||||
compressed_json_dump = context.HARLog.compress()
|
|
||||||
|
|
||||||
if context.dump_file == '-':
|
|
||||||
mitmproxy.ctx.log(pprint.pformat(json.loads(json_dump)))
|
|
||||||
elif context.dump_file.endswith('.zhar'):
|
|
||||||
with open(context.dump_file, "wb") as f:
|
|
||||||
f.write(compressed_json_dump)
|
|
||||||
else:
|
|
||||||
with open(context.dump_file, "wb") as f:
|
|
||||||
f.write(json_dump)
|
|
||||||
mitmproxy.ctx.log(
|
|
||||||
"HAR log finished with %s bytes (%s bytes compressed)" % (
|
|
||||||
len(json_dump), len(compressed_json_dump)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
mitmproxy.ctx.log(
|
|
||||||
"Compression rate is %s%%" % str(
|
|
||||||
100. * len(compressed_json_dump) / len(json_dump)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def format_cookies(obj):
|
|
||||||
if obj:
|
|
||||||
return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()]
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def format_headers(obj):
|
|
||||||
if obj:
|
|
||||||
return [{"name": k, "value": v} for k, v in obj.fields]
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def print_attributes(obj, filter_string=None, hide_privates=False):
|
|
||||||
"""
|
|
||||||
Useful helper method to quickly get all attributes of an object and its
|
|
||||||
values.
|
|
||||||
"""
|
|
||||||
for attr in dir(obj):
|
|
||||||
if hide_privates and "__" in attr:
|
|
||||||
continue
|
|
||||||
if filter_string is not None and filter_string not in attr:
|
|
||||||
continue
|
|
||||||
value = getattr(obj, attr)
|
|
||||||
print("%s.%s" % ('obj', attr), value, type(value))
|
|
@ -26,6 +26,12 @@ variants. Serialization follows RFC6265.
|
|||||||
http://tools.ietf.org/html/rfc2965
|
http://tools.ietf.org/html/rfc2965
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_cookie_params = set((
|
||||||
|
'expires', 'path', 'comment', 'max-age',
|
||||||
|
'secure', 'httponly', 'version',
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
# TODO: Disallow LHS-only Cookie values
|
# TODO: Disallow LHS-only Cookie values
|
||||||
|
|
||||||
|
|
||||||
@ -263,6 +269,32 @@ def refresh_set_cookie_header(c, delta):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def get_expiration_ts(cookie_attrs):
|
||||||
|
"""
|
||||||
|
Determines the time when the cookie will be expired.
|
||||||
|
|
||||||
|
Considering both 'expires' and 'max-age' parameters.
|
||||||
|
|
||||||
|
Returns: timestamp of when the cookie will expire.
|
||||||
|
None, if no expiration time is set.
|
||||||
|
"""
|
||||||
|
if 'expires' in cookie_attrs:
|
||||||
|
e = email.utils.parsedate_tz(cookie_attrs["expires"])
|
||||||
|
if e:
|
||||||
|
return email.utils.mktime_tz(e)
|
||||||
|
|
||||||
|
elif 'max-age' in cookie_attrs:
|
||||||
|
try:
|
||||||
|
max_age = int(cookie_attrs['Max-Age'])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
now_ts = time.time()
|
||||||
|
return now_ts + max_age
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def is_expired(cookie_attrs):
|
def is_expired(cookie_attrs):
|
||||||
"""
|
"""
|
||||||
Determines whether a cookie has expired.
|
Determines whether a cookie has expired.
|
||||||
@ -270,20 +302,36 @@ def is_expired(cookie_attrs):
|
|||||||
Returns: boolean
|
Returns: boolean
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# See if 'expires' time is in the past
|
exp_ts = get_expiration_ts(cookie_attrs)
|
||||||
expires = False
|
now_ts = time.time()
|
||||||
if 'expires' in cookie_attrs:
|
|
||||||
e = email.utils.parsedate_tz(cookie_attrs["expires"])
|
|
||||||
if e:
|
|
||||||
exp_ts = email.utils.mktime_tz(e)
|
|
||||||
now_ts = time.time()
|
|
||||||
expires = exp_ts < now_ts
|
|
||||||
|
|
||||||
# or if Max-Age is 0
|
# If no expiration information was provided with the cookie
|
||||||
max_age = False
|
if exp_ts is None:
|
||||||
try:
|
return False
|
||||||
max_age = int(cookie_attrs.get('Max-Age', 1)) == 0
|
else:
|
||||||
except ValueError:
|
return exp_ts <= now_ts
|
||||||
pass
|
|
||||||
|
|
||||||
return expires or max_age
|
|
||||||
|
def group_cookies(pairs):
|
||||||
|
"""
|
||||||
|
Converts a list of pairs to a (name, value, attrs) for each cookie.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not pairs:
|
||||||
|
return []
|
||||||
|
|
||||||
|
cookie_list = []
|
||||||
|
|
||||||
|
# First pair is always a new cookie
|
||||||
|
name, value = pairs[0]
|
||||||
|
attrs = []
|
||||||
|
|
||||||
|
for k, v in pairs[1:]:
|
||||||
|
if k.lower() in _cookie_params:
|
||||||
|
attrs.append((k, v))
|
||||||
|
else:
|
||||||
|
cookie_list.append((name, value, CookieAttrs(attrs)))
|
||||||
|
name, value, attrs = k, v, []
|
||||||
|
|
||||||
|
cookie_list.append((name, value, CookieAttrs(attrs)))
|
||||||
|
return cookie_list
|
||||||
|
1
setup.py
1
setup.py
@ -119,7 +119,6 @@ setup(
|
|||||||
],
|
],
|
||||||
'examples': [
|
'examples': [
|
||||||
"beautifulsoup4>=4.4.1, <4.6",
|
"beautifulsoup4>=4.4.1, <4.6",
|
||||||
"harparser>=0.2, <0.3",
|
|
||||||
"pytz>=2015.07.0, <=2016.6.1",
|
"pytz>=2015.07.0, <=2016.6.1",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,20 @@
|
|||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
import six
|
import six
|
||||||
import sys
|
|
||||||
import os.path
|
|
||||||
from mitmproxy.flow import master
|
|
||||||
from mitmproxy.flow import state
|
|
||||||
from mitmproxy import options
|
from mitmproxy import options
|
||||||
from mitmproxy import contentviews
|
from mitmproxy import contentviews
|
||||||
from mitmproxy.builtins import script
|
from mitmproxy.builtins import script
|
||||||
|
from mitmproxy.flow import master
|
||||||
|
from mitmproxy.flow import state
|
||||||
|
|
||||||
import netlib.utils
|
import netlib.utils
|
||||||
|
|
||||||
from netlib import tutils as netutils
|
from netlib import tutils as netutils
|
||||||
from netlib.http import Headers
|
from netlib.http import Headers
|
||||||
|
from netlib.http import cookies
|
||||||
|
|
||||||
from . import tutils, mastertest
|
from . import tutils, mastertest
|
||||||
|
|
||||||
example_dir = netlib.utils.Data(__name__).push("../../examples")
|
example_dir = netlib.utils.Data(__name__).push("../../examples")
|
||||||
@ -98,30 +102,66 @@ class TestScripts(mastertest.MasterTest):
|
|||||||
m.request(f)
|
m.request(f)
|
||||||
assert f.request.host == "mitmproxy.org"
|
assert f.request.host == "mitmproxy.org"
|
||||||
|
|
||||||
def test_har_extractor(self):
|
|
||||||
if sys.version_info >= (3, 0):
|
|
||||||
with tutils.raises("does not work on Python 3"):
|
|
||||||
tscript("har_extractor.py")
|
|
||||||
return
|
|
||||||
|
|
||||||
|
class TestHARDump():
|
||||||
|
|
||||||
|
def flow(self, resp_content=b'message'):
|
||||||
|
times = dict(
|
||||||
|
timestamp_start=746203272,
|
||||||
|
timestamp_end=746203272,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a dummy flow for testing
|
||||||
|
return tutils.tflow(
|
||||||
|
req=netutils.treq(method=b'GET', **times),
|
||||||
|
resp=netutils.tresp(content=resp_content, **times)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_file_arg(self):
|
||||||
with tutils.raises(ScriptError):
|
with tutils.raises(ScriptError):
|
||||||
tscript("har_extractor.py")
|
tscript("har_dump.py")
|
||||||
|
|
||||||
|
def test_simple(self):
|
||||||
with tutils.tmpdir() as tdir:
|
with tutils.tmpdir() as tdir:
|
||||||
times = dict(
|
path = os.path.join(tdir, "somefile")
|
||||||
timestamp_start=746203272,
|
|
||||||
timestamp_end=746203272,
|
|
||||||
)
|
|
||||||
|
|
||||||
path = os.path.join(tdir, "file")
|
m, sc = tscript("har_dump.py", six.moves.shlex_quote(path))
|
||||||
m, sc = tscript("har_extractor.py", six.moves.shlex_quote(path))
|
m.addons.invoke(m, "response", self.flow())
|
||||||
f = tutils.tflow(
|
|
||||||
req=netutils.treq(**times),
|
|
||||||
resp=netutils.tresp(**times)
|
|
||||||
)
|
|
||||||
m.response(f)
|
|
||||||
m.addons.remove(sc)
|
m.addons.remove(sc)
|
||||||
|
|
||||||
with open(path, "rb") as f:
|
with open(path, "r") as inp:
|
||||||
test_data = json.load(f)
|
har = json.load(inp)
|
||||||
assert len(test_data["log"]["pages"]) == 1
|
|
||||||
|
assert len(har["log"]["entries"]) == 1
|
||||||
|
|
||||||
|
def test_base64(self):
|
||||||
|
with tutils.tmpdir() as tdir:
|
||||||
|
path = os.path.join(tdir, "somefile")
|
||||||
|
|
||||||
|
m, sc = tscript("har_dump.py", six.moves.shlex_quote(path))
|
||||||
|
m.addons.invoke(m, "response", self.flow(resp_content=b"foo" + b"\xFF" * 10))
|
||||||
|
m.addons.remove(sc)
|
||||||
|
|
||||||
|
with open(path, "r") as inp:
|
||||||
|
har = json.load(inp)
|
||||||
|
|
||||||
|
assert har["log"]["entries"][0]["response"]["content"]["encoding"] == "base64"
|
||||||
|
|
||||||
|
def test_format_cookies(self):
|
||||||
|
m, sc = tscript("har_dump.py", "-")
|
||||||
|
format_cookies = sc.ns.ns["format_cookies"]
|
||||||
|
|
||||||
|
CA = cookies.CookieAttrs
|
||||||
|
|
||||||
|
f = format_cookies([("n", "v", CA([("k", "v")]))])[0]
|
||||||
|
assert f['name'] == "n"
|
||||||
|
assert f['value'] == "v"
|
||||||
|
assert not f['httpOnly']
|
||||||
|
assert not f['secure']
|
||||||
|
|
||||||
|
f = format_cookies([("n", "v", CA([("httponly", None), ("secure", None)]))])[0]
|
||||||
|
assert f['httpOnly']
|
||||||
|
assert f['secure']
|
||||||
|
|
||||||
|
f = format_cookies([("n", "v", CA([("expires", "Mon, 24-Aug-2037 00:00:00 GMT")]))])[0]
|
||||||
|
assert f['expires']
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
from netlib.http import cookies
|
from netlib.http import cookies
|
||||||
from netlib.tutils import raises
|
from netlib.tutils import raises
|
||||||
|
|
||||||
|
import mock
|
||||||
|
|
||||||
|
|
||||||
def test_read_token():
|
def test_read_token():
|
||||||
tokens = [
|
tokens = [
|
||||||
@ -247,6 +251,22 @@ def test_refresh_cookie():
|
|||||||
assert cookies.refresh_set_cookie_header(c, 0)
|
assert cookies.refresh_set_cookie_header(c, 0)
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch('time.time')
|
||||||
|
def test_get_expiration_ts(*args):
|
||||||
|
# Freeze time
|
||||||
|
now_ts = 17
|
||||||
|
time.time.return_value = now_ts
|
||||||
|
|
||||||
|
CA = cookies.CookieAttrs
|
||||||
|
F = cookies.get_expiration_ts
|
||||||
|
|
||||||
|
assert F(CA([("Expires", "Thu, 01-Jan-1970 00:00:00 GMT")])) == 0
|
||||||
|
assert F(CA([("Expires", "Mon, 24-Aug-2037 00:00:00 GMT")])) == 2134684800
|
||||||
|
|
||||||
|
assert F(CA([("Max-Age", "0")])) == now_ts
|
||||||
|
assert F(CA([("Max-Age", "31")])) == now_ts + 31
|
||||||
|
|
||||||
|
|
||||||
def test_is_expired():
|
def test_is_expired():
|
||||||
CA = cookies.CookieAttrs
|
CA = cookies.CookieAttrs
|
||||||
|
|
||||||
@ -260,9 +280,53 @@ def test_is_expired():
|
|||||||
# or both
|
# or both
|
||||||
assert cookies.is_expired(CA([("Expires", "Thu, 01-Jan-1970 00:00:00 GMT"), ("Max-Age", "0")]))
|
assert cookies.is_expired(CA([("Expires", "Thu, 01-Jan-1970 00:00:00 GMT"), ("Max-Age", "0")]))
|
||||||
|
|
||||||
assert not cookies.is_expired(CA([("Expires", "Thu, 24-Aug-2063 00:00:00 GMT")]))
|
assert not cookies.is_expired(CA([("Expires", "Mon, 24-Aug-2037 00:00:00 GMT")]))
|
||||||
assert not cookies.is_expired(CA([("Max-Age", "1")]))
|
assert not cookies.is_expired(CA([("Max-Age", "1")]))
|
||||||
assert not cookies.is_expired(CA([("Expires", "Thu, 15-Jul-2068 00:00:00 GMT"), ("Max-Age", "1")]))
|
assert not cookies.is_expired(CA([("Expires", "Wed, 15-Jul-2037 00:00:00 GMT"), ("Max-Age", "1")]))
|
||||||
|
|
||||||
assert not cookies.is_expired(CA([("Max-Age", "nan")]))
|
assert not cookies.is_expired(CA([("Max-Age", "nan")]))
|
||||||
assert not cookies.is_expired(CA([("Expires", "false")]))
|
assert not cookies.is_expired(CA([("Expires", "false")]))
|
||||||
|
|
||||||
|
|
||||||
|
def test_group_cookies():
|
||||||
|
CA = cookies.CookieAttrs
|
||||||
|
groups = [
|
||||||
|
[
|
||||||
|
"one=uno; foo=bar; foo=baz",
|
||||||
|
[
|
||||||
|
('one', 'uno', CA([])),
|
||||||
|
('foo', 'bar', CA([])),
|
||||||
|
('foo', 'baz', CA([]))
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"one=uno; Path=/; foo=bar; Max-Age=0; foo=baz; expires=24-08-1993",
|
||||||
|
[
|
||||||
|
('one', 'uno', CA([('Path', '/')])),
|
||||||
|
('foo', 'bar', CA([('Max-Age', '0')])),
|
||||||
|
('foo', 'baz', CA([('expires', '24-08-1993')]))
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"one=uno;",
|
||||||
|
[
|
||||||
|
('one', 'uno', CA([]))
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"one=uno; Path=/; Max-Age=0; Expires=24-08-1993",
|
||||||
|
[
|
||||||
|
('one', 'uno', CA([('Path', '/'), ('Max-Age', '0'), ('Expires', '24-08-1993')]))
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"path=val; Path=/",
|
||||||
|
[
|
||||||
|
('path', 'val', CA([('Path', '/')]))
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
for c, expected in groups:
|
||||||
|
observed = cookies.group_cookies(cookies.parse_cookie_header(c))
|
||||||
|
assert observed == expected
|
||||||
|
Loading…
Reference in New Issue
Block a user