2016-08-02 12:20:14 +00:00
|
|
|
"""
|
|
|
|
This inline script can be used to dump flows as HAR files.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
2016-08-10 07:53:19 +00:00
|
|
|
import base64
|
2016-08-10 08:11:48 +00:00
|
|
|
import zlib
|
2017-02-05 13:29:01 +00:00
|
|
|
import os
|
2018-02-03 20:37:33 +00:00
|
|
|
import typing # noqa
|
2016-08-02 12:20:14 +00:00
|
|
|
|
|
|
|
from datetime import datetime
|
2017-02-25 17:37:47 +00:00
|
|
|
from datetime import timezone
|
2016-08-02 12:20:14 +00:00
|
|
|
|
|
|
|
import mitmproxy
|
|
|
|
|
2018-02-03 20:37:33 +00:00
|
|
|
from mitmproxy import connections # noqa
|
2016-10-19 20:20:44 +00:00
|
|
|
from mitmproxy import version
|
2017-04-25 23:45:15 +00:00
|
|
|
from mitmproxy import ctx
|
2016-10-19 21:11:58 +00:00
|
|
|
from mitmproxy.utils import strutils
|
2016-10-19 22:56:38 +00:00
|
|
|
from mitmproxy.net.http import cookies
|
2016-08-04 09:08:14 +00:00
|
|
|
|
2018-02-03 20:37:33 +00:00
|
|
|
HAR = {} # type: typing.Dict
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2016-08-09 05:35:16 +00:00
|
|
|
# A list of server seen till now is maintained so we can avoid
|
|
|
|
# using 'connect' time for entries that use an existing connection.
|
2018-02-03 20:37:33 +00:00
|
|
|
SERVERS_SEEN = set() # type: typing.Set[connections.ServerConnection]
|
2016-08-09 05:35:16 +00:00
|
|
|
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2017-03-23 22:29:36 +00:00
|
|
|
def load(l):
|
2017-04-25 23:45:15 +00:00
|
|
|
l.add_option(
|
|
|
|
"hardump", str, "", "HAR dump path.",
|
|
|
|
)
|
|
|
|
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2017-04-25 23:45:15 +00:00
|
|
|
def configure(updated):
|
2016-08-02 12:20:14 +00:00
|
|
|
HAR.update({
|
|
|
|
"log": {
|
|
|
|
"version": "1.2",
|
|
|
|
"creator": {
|
|
|
|
"name": "mitmproxy har_dump",
|
|
|
|
"version": "0.1",
|
|
|
|
"comment": "mitmproxy version %s" % version.MITMPROXY
|
|
|
|
},
|
|
|
|
"entries": []
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
def response(flow):
|
|
|
|
"""
|
|
|
|
Called when a server response has been received.
|
|
|
|
"""
|
|
|
|
|
2016-08-09 05:35:16 +00:00
|
|
|
# -1 indicates that these values do not apply to current request
|
|
|
|
ssl_time = -1
|
|
|
|
connect_time = -1
|
|
|
|
|
|
|
|
if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
|
|
|
|
connect_time = (flow.server_conn.timestamp_tcp_setup -
|
|
|
|
flow.server_conn.timestamp_start)
|
|
|
|
|
2018-01-05 21:46:23 +00:00
|
|
|
if flow.server_conn.timestamp_tls_setup is not None:
|
|
|
|
ssl_time = (flow.server_conn.timestamp_tls_setup -
|
2016-08-09 05:35:16 +00:00
|
|
|
flow.server_conn.timestamp_tcp_setup)
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2016-08-09 05:35:16 +00:00
|
|
|
SERVERS_SEEN.add(flow.server_conn)
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2016-08-09 05:35:16 +00:00
|
|
|
# Calculate raw timings from timestamps. DNS timings can not be calculated
|
|
|
|
# for lack of a way to measure it. The same goes for HAR blocked.
|
2016-08-02 12:20:14 +00:00
|
|
|
# mitmproxy will open a server connection as soon as it receives the host
|
|
|
|
# and port from the client connection. So, the time spent waiting is actually
|
2016-08-09 05:35:16 +00:00
|
|
|
# spent waiting between request.timestamp_end and response.timestamp_start
|
|
|
|
# thus it correlates to HAR wait instead.
|
2016-08-02 12:20:14 +00:00
|
|
|
timings_raw = {
|
|
|
|
'send': flow.request.timestamp_end - flow.request.timestamp_start,
|
|
|
|
'receive': flow.response.timestamp_end - flow.response.timestamp_start,
|
|
|
|
'wait': flow.response.timestamp_start - flow.request.timestamp_end,
|
2016-08-09 05:35:16 +00:00
|
|
|
'connect': connect_time,
|
|
|
|
'ssl': ssl_time,
|
2016-08-02 12:20:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# HAR timings are integers in ms, so we re-encode the raw timings to that format.
|
|
|
|
timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])
|
|
|
|
|
|
|
|
# full_time is the sum of all timings.
|
|
|
|
# Timings set to -1 will be ignored as per spec.
|
|
|
|
full_time = sum(v for v in timings.values() if v > -1)
|
|
|
|
|
2017-02-25 17:37:47 +00:00
|
|
|
started_date_time = datetime.fromtimestamp(flow.request.timestamp_start, timezone.utc).isoformat()
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2016-08-10 07:53:19 +00:00
|
|
|
# Response body size and encoding
|
|
|
|
response_body_size = len(flow.response.raw_content)
|
2016-08-02 12:20:14 +00:00
|
|
|
response_body_decoded_size = len(flow.response.content)
|
|
|
|
response_body_compression = response_body_decoded_size - response_body_size
|
|
|
|
|
2016-08-09 06:13:29 +00:00
|
|
|
entry = {
|
2016-08-02 12:20:14 +00:00
|
|
|
"startedDateTime": started_date_time,
|
|
|
|
"time": full_time,
|
|
|
|
"request": {
|
|
|
|
"method": flow.request.method,
|
|
|
|
"url": flow.request.url,
|
|
|
|
"httpVersion": flow.request.http_version,
|
2016-08-04 09:08:14 +00:00
|
|
|
"cookies": format_request_cookies(flow.request.cookies.fields),
|
2016-08-02 12:20:14 +00:00
|
|
|
"headers": name_value(flow.request.headers),
|
|
|
|
"queryString": name_value(flow.request.query or {}),
|
|
|
|
"headersSize": len(str(flow.request.headers)),
|
|
|
|
"bodySize": len(flow.request.content),
|
|
|
|
},
|
|
|
|
"response": {
|
|
|
|
"status": flow.response.status_code,
|
|
|
|
"statusText": flow.response.reason,
|
|
|
|
"httpVersion": flow.response.http_version,
|
2016-08-04 09:08:14 +00:00
|
|
|
"cookies": format_response_cookies(flow.response.cookies.fields),
|
2016-08-02 12:20:14 +00:00
|
|
|
"headers": name_value(flow.response.headers),
|
|
|
|
"content": {
|
|
|
|
"size": response_body_size,
|
|
|
|
"compression": response_body_compression,
|
|
|
|
"mimeType": flow.response.headers.get('Content-Type', '')
|
|
|
|
},
|
|
|
|
"redirectURL": flow.response.headers.get('Location', ''),
|
|
|
|
"headersSize": len(str(flow.response.headers)),
|
|
|
|
"bodySize": response_body_size,
|
|
|
|
},
|
|
|
|
"cache": {},
|
|
|
|
"timings": timings,
|
2016-08-09 06:13:29 +00:00
|
|
|
}
|
|
|
|
|
2016-10-23 01:47:12 +00:00
|
|
|
# Store binary data as base64
|
2016-08-10 07:53:19 +00:00
|
|
|
if strutils.is_mostly_bin(flow.response.content):
|
2016-10-23 01:47:12 +00:00
|
|
|
entry["response"]["content"]["text"] = base64.b64encode(flow.response.content).decode()
|
2016-08-10 07:53:19 +00:00
|
|
|
entry["response"]["content"]["encoding"] = "base64"
|
|
|
|
else:
|
2016-10-23 01:47:12 +00:00
|
|
|
entry["response"]["content"]["text"] = flow.response.get_text(strict=False)
|
2016-08-10 07:53:19 +00:00
|
|
|
|
2016-08-11 20:01:33 +00:00
|
|
|
if flow.request.method in ["POST", "PUT", "PATCH"]:
|
2016-10-23 01:47:12 +00:00
|
|
|
params = [
|
2016-12-19 00:15:10 +00:00
|
|
|
{"name": a, "value": b}
|
2016-10-23 01:47:12 +00:00
|
|
|
for a, b in flow.request.urlencoded_form.items(multi=True)
|
|
|
|
]
|
2016-08-09 06:13:29 +00:00
|
|
|
entry["request"]["postData"] = {
|
2016-10-24 21:34:04 +00:00
|
|
|
"mimeType": flow.request.headers.get("Content-Type", ""),
|
2016-10-23 01:47:12 +00:00
|
|
|
"text": flow.request.get_text(strict=False),
|
|
|
|
"params": params
|
2016-08-09 06:13:29 +00:00
|
|
|
}
|
|
|
|
|
2016-11-03 21:54:04 +00:00
|
|
|
if flow.server_conn.connected():
|
2017-03-08 15:18:34 +00:00
|
|
|
entry["serverIPAddress"] = str(flow.server_conn.ip_address[0])
|
2016-08-09 18:13:47 +00:00
|
|
|
|
2016-08-09 06:13:29 +00:00
|
|
|
HAR["log"]["entries"].append(entry)
|
2016-08-02 12:20:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def done():
|
|
|
|
"""
|
|
|
|
Called once on script shutdown, after any other events.
|
|
|
|
"""
|
2017-04-25 23:45:15 +00:00
|
|
|
if ctx.options.hardump:
|
|
|
|
json_dump = json.dumps(HAR, indent=2) # type: str
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2017-04-25 23:45:15 +00:00
|
|
|
if ctx.options.hardump == '-':
|
|
|
|
mitmproxy.ctx.log(json_dump)
|
|
|
|
else:
|
|
|
|
raw = json_dump.encode() # type: bytes
|
|
|
|
if ctx.options.hardump.endswith('.zhar'):
|
|
|
|
raw = zlib.compress(raw, 9)
|
2016-08-10 08:11:48 +00:00
|
|
|
|
2017-04-25 23:45:15 +00:00
|
|
|
with open(os.path.expanduser(ctx.options.hardump), "wb") as f:
|
|
|
|
f.write(raw)
|
2016-08-02 12:20:14 +00:00
|
|
|
|
2017-04-25 23:45:15 +00:00
|
|
|
mitmproxy.ctx.log("HAR dump finished (wrote %s bytes to file)" % len(json_dump))
|
2016-08-02 12:20:14 +00:00
|
|
|
|
|
|
|
|
2016-08-11 10:22:46 +00:00
|
|
|
def format_cookies(cookie_list):
|
|
|
|
rv = []
|
2016-08-04 09:08:14 +00:00
|
|
|
|
2016-08-11 10:22:46 +00:00
|
|
|
for name, value, attrs in cookie_list:
|
2016-08-04 09:08:14 +00:00
|
|
|
cookie_har = {
|
|
|
|
"name": name,
|
|
|
|
"value": value,
|
|
|
|
}
|
2016-08-08 08:26:12 +00:00
|
|
|
|
|
|
|
# HAR only needs some attributes
|
|
|
|
for key in ["path", "domain", "comment"]:
|
|
|
|
if key in attrs:
|
|
|
|
cookie_har[key] = attrs[key]
|
|
|
|
|
|
|
|
# These keys need to be boolean!
|
|
|
|
for key in ["httpOnly", "secure"]:
|
|
|
|
cookie_har[key] = bool(key in attrs)
|
|
|
|
|
|
|
|
# Expiration time needs to be formatted
|
|
|
|
expire_ts = cookies.get_expiration_ts(attrs)
|
2016-08-11 10:22:46 +00:00
|
|
|
if expire_ts is not None:
|
2017-02-25 17:37:47 +00:00
|
|
|
cookie_har["expires"] = datetime.fromtimestamp(expire_ts, timezone.utc).isoformat()
|
2016-08-04 09:08:14 +00:00
|
|
|
|
2016-08-11 10:22:46 +00:00
|
|
|
rv.append(cookie_har)
|
2016-08-04 09:08:14 +00:00
|
|
|
|
2016-08-11 10:22:46 +00:00
|
|
|
return rv
|
2016-08-04 09:08:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def format_request_cookies(fields):
|
|
|
|
return format_cookies(cookies.group_cookies(fields))
|
|
|
|
|
|
|
|
|
|
|
|
def format_response_cookies(fields):
|
2017-04-26 12:15:33 +00:00
|
|
|
return format_cookies((c[0], c[1][0], c[1][1]) for c in fields)
|
2016-08-04 09:08:14 +00:00
|
|
|
|
|
|
|
|
2016-08-02 12:20:14 +00:00
|
|
|
def name_value(obj):
|
|
|
|
"""
|
|
|
|
Convert (key, value) pairs to HAR format.
|
|
|
|
"""
|
2016-10-23 01:47:12 +00:00
|
|
|
return [{"name": k, "value": v} for k, v in obj.items()]
|