mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-23 08:11:00 +00:00
Merge pull request #989 from dufferzafar/har-extractor
Improve HAR Extractor Script
This commit is contained in:
commit
428da2c4b1
0
examples/__init__.py
Normal file
0
examples/__init__.py
Normal file
@ -1,5 +1,4 @@
|
||||
"""
|
||||
|
||||
This inline script utilizes harparser.HAR from
|
||||
https://github.com/JustusW/harparser to generate a HAR log object.
|
||||
"""
|
||||
@ -17,7 +16,7 @@ class _HARLog(HAR.log):
|
||||
__page_count__ = 0
|
||||
__page_ref__ = {}
|
||||
|
||||
def __init__(self, page_list):
|
||||
def __init__(self, page_list=[]):
|
||||
self.__page_list__ = page_list
|
||||
self.__page_count__ = 0
|
||||
self.__page_ref__ = {}
|
||||
@ -67,7 +66,7 @@ def start(context, argv):
|
||||
'(- will output to stdout, filenames ending with .zhar '
|
||||
'will result in compressed har)'
|
||||
)
|
||||
context.HARLog = _HARLog(['https://github.com'])
|
||||
context.HARLog = _HARLog()
|
||||
context.seen_server = set()
|
||||
|
||||
|
||||
@ -83,8 +82,8 @@ def response(context, flow):
|
||||
# Calculate the connect_time for this server_conn. Afterwards add it to
|
||||
# seen list, in order to avoid the connect_time being present in entries
|
||||
# that use an existing connection.
|
||||
connect_time = flow.server_conn.timestamp_tcp_setup - \
|
||||
flow.server_conn.timestamp_start
|
||||
connect_time = (flow.server_conn.timestamp_tcp_setup -
|
||||
flow.server_conn.timestamp_start)
|
||||
context.seen_server.add(flow.server_conn)
|
||||
|
||||
if flow.server_conn.timestamp_ssl_setup is not None:
|
||||
@ -92,8 +91,8 @@ def response(context, flow):
|
||||
# the start of the successful tcp setup and the successful ssl
|
||||
# setup. If no ssl setup has been made it is left as -1 since it
|
||||
# doesn't apply to this connection.
|
||||
ssl_time = flow.server_conn.timestamp_ssl_setup - \
|
||||
flow.server_conn.timestamp_tcp_setup
|
||||
ssl_time = (flow.server_conn.timestamp_ssl_setup -
|
||||
flow.server_conn.timestamp_tcp_setup)
|
||||
|
||||
# Calculate the raw timings from the different timestamps present in the
|
||||
# request and response object. For lack of a way to measure it dns timings
|
||||
@ -112,80 +111,58 @@ def response(context, flow):
|
||||
|
||||
# HAR timings are integers in ms, so we have to re-encode the raw timings to
|
||||
# that format.
|
||||
timings = dict([(key, int(1000 * value))
|
||||
for key, value in timings_raw.iteritems()])
|
||||
timings = dict([(k, int(1000 * v)) for k, v in timings_raw.iteritems()])
|
||||
|
||||
# The full_time is the sum of all timings. Timings set to -1 will be ignored
|
||||
# as per spec.
|
||||
full_time = 0
|
||||
for item in timings.values():
|
||||
if item > -1:
|
||||
full_time += item
|
||||
# The full_time is the sum of all timings.
|
||||
# Timings set to -1 will be ignored as per spec.
|
||||
full_time = sum(v for v in timings.values() if v > -1)
|
||||
|
||||
started_date_time = datetime.fromtimestamp(
|
||||
flow.request.timestamp_start,
|
||||
tz=utc).isoformat()
|
||||
started_date_time = datetime.utcfromtimestamp(
|
||||
flow.request.timestamp_start).isoformat()
|
||||
|
||||
request_query_string = [{"name": k, "value": v}
|
||||
for k, v in flow.request.query]
|
||||
request_http_version = flow.request.http_version
|
||||
# Cookies are shaped as tuples by MITMProxy.
|
||||
request_cookies = [{"name": k.strip(), "value": v[0]}
|
||||
for k, v in flow.request.cookies.items()]
|
||||
request_headers = [{"name": k, "value": v} for k, v in flow.request.headers]
|
||||
request_headers_size = len(str(flow.request.headers))
|
||||
request_body_size = len(flow.request.content)
|
||||
for k, v in flow.request.query or {}]
|
||||
|
||||
response_http_version = flow.response.http_version
|
||||
# Cookies are shaped as tuples by MITMProxy.
|
||||
response_cookies = [{"name": k.strip(), "value": v[0]}
|
||||
for k, v in flow.response.cookies.items()]
|
||||
response_headers = [{"name": k, "value": v}
|
||||
for k, v in flow.response.headers]
|
||||
response_headers_size = len(str(flow.response.headers))
|
||||
response_body_size = len(flow.response.content)
|
||||
response_body_decoded_size = len(flow.response.get_decoded_content())
|
||||
response_body_compression = response_body_decoded_size - response_body_size
|
||||
response_mime_type = flow.response.headers.get('Content-Type', '')
|
||||
response_redirect_url = flow.response.headers.get('Location', '')
|
||||
|
||||
entry = HAR.entries(
|
||||
{
|
||||
entry = HAR.entries({
|
||||
"startedDateTime": started_date_time,
|
||||
"time": full_time,
|
||||
"request": {
|
||||
"method": flow.request.method,
|
||||
"url": flow.request.url,
|
||||
"httpVersion": request_http_version,
|
||||
"cookies": request_cookies,
|
||||
"headers": request_headers,
|
||||
"httpVersion": flow.request.http_version,
|
||||
"cookies": format_cookies(flow.request.cookies),
|
||||
"headers": format_headers(flow.request.headers),
|
||||
"queryString": request_query_string,
|
||||
"headersSize": request_headers_size,
|
||||
"bodySize": request_body_size,
|
||||
"headersSize": len(str(flow.request.headers)),
|
||||
"bodySize": len(flow.request.content),
|
||||
},
|
||||
"response": {
|
||||
"status": flow.response.status_code,
|
||||
"statusText": flow.response.msg,
|
||||
"httpVersion": response_http_version,
|
||||
"cookies": response_cookies,
|
||||
"headers": response_headers,
|
||||
"httpVersion": flow.response.http_version,
|
||||
"cookies": format_cookies(flow.response.cookies),
|
||||
"headers": format_headers(flow.response.headers),
|
||||
"content": {
|
||||
"size": response_body_size,
|
||||
"compression": response_body_compression,
|
||||
"mimeType": response_mime_type},
|
||||
"redirectURL": response_redirect_url,
|
||||
"headersSize": response_headers_size,
|
||||
"mimeType": flow.response.headers.get('Content-Type', '')
|
||||
},
|
||||
"redirectURL": flow.response.headers.get('Location', ''),
|
||||
"headersSize": len(str(flow.response.headers)),
|
||||
"bodySize": response_body_size,
|
||||
},
|
||||
"cache": {},
|
||||
"timings": timings,
|
||||
})
|
||||
|
||||
# If the current url is in the page list of context.HARLog or does not have
|
||||
# a referrer we add it as a new pages object.
|
||||
if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get(
|
||||
'Referer',
|
||||
None) is None:
|
||||
# If the current url is in the page list of context.HARLog or
|
||||
# does not have a referrer, we add it as a new pages object.
|
||||
if (flow.request.url in context.HARLog.get_page_list() or
|
||||
flow.request.headers.get('Referer') is None):
|
||||
page_id = context.HARLog.create_page_id()
|
||||
context.HARLog.add(
|
||||
HAR.pages({
|
||||
@ -215,7 +192,7 @@ def done(context):
|
||||
"""
|
||||
Called once on script shutdown, after any other events.
|
||||
"""
|
||||
from pprint import pprint
|
||||
import pprint
|
||||
import json
|
||||
|
||||
json_dump = context.HARLog.json()
|
||||
@ -239,6 +216,18 @@ def done(context):
|
||||
)
|
||||
|
||||
|
||||
def format_cookies(obj):
|
||||
if obj:
|
||||
return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()]
|
||||
return ""
|
||||
|
||||
|
||||
def format_headers(obj):
|
||||
if obj:
|
||||
return [{"name": k, "value": v} for k, v in obj.fields]
|
||||
return ""
|
||||
|
||||
|
||||
def print_attributes(obj, filter_string=None, hide_privates=False):
|
||||
"""
|
||||
Useful helper method to quickly get all attributes of an object and its
|
||||
|
78
test/mitmproxy/data/har_extractor.har
Normal file
78
test/mitmproxy/data/har_extractor.har
Normal file
@ -0,0 +1,78 @@
|
||||
{
|
||||
"test_response": {
|
||||
"log": {
|
||||
"__page_count__": 1,
|
||||
"version": "1.2",
|
||||
"creator": {
|
||||
"comment": "",
|
||||
"version": "0.1",
|
||||
"name": "MITMPROXY HARExtractor"
|
||||
},
|
||||
"pages": [
|
||||
{
|
||||
"startedDateTime": "1993-08-24T14:41:12",
|
||||
"id": "autopage_1",
|
||||
"title": "http://address:22/path"
|
||||
}
|
||||
],
|
||||
"entries": [
|
||||
{
|
||||
"pageref": "autopage_1",
|
||||
"startedDateTime": "1993-08-24T14:41:12",
|
||||
"cache": {},
|
||||
"request": {
|
||||
"cookies": [],
|
||||
"url": "http://address:22/path",
|
||||
"queryString": [],
|
||||
"headers": [
|
||||
{
|
||||
"name": "header",
|
||||
"value": "qvalue"
|
||||
},
|
||||
{
|
||||
"name": "content-length",
|
||||
"value": "7"
|
||||
}
|
||||
],
|
||||
"headersSize": 35,
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"method": "GET",
|
||||
"bodySize": 7
|
||||
},
|
||||
"timings": {
|
||||
"receive": 0,
|
||||
"ssl": 1000,
|
||||
"connect": 1000,
|
||||
"send": 0,
|
||||
"wait": 0
|
||||
},
|
||||
"time": 2000,
|
||||
"response": {
|
||||
"status": 200,
|
||||
"cookies": [],
|
||||
"statusText": "OK",
|
||||
"content": {
|
||||
"mimeType": "",
|
||||
"compression": 0,
|
||||
"size": 7
|
||||
},
|
||||
"headers": [
|
||||
{
|
||||
"name": "content-length",
|
||||
"value": "7"
|
||||
},
|
||||
{
|
||||
"name": "header-response",
|
||||
"value": "svalue"
|
||||
}
|
||||
],
|
||||
"headersSize": 44,
|
||||
"redirectURL": "",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"bodySize": 7
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
37
test/mitmproxy/test_har_extractor.py
Normal file
37
test/mitmproxy/test_har_extractor.py
Normal file
@ -0,0 +1,37 @@
|
||||
import json
|
||||
import netlib.tutils
|
||||
from . import tutils
|
||||
|
||||
from examples import har_extractor
|
||||
|
||||
|
||||
class Context(object):
|
||||
pass
|
||||
|
||||
|
||||
trequest = netlib.tutils.treq(
|
||||
timestamp_start=746203272,
|
||||
timestamp_end=746203272,
|
||||
)
|
||||
|
||||
tresponse = netlib.tutils.tresp(
|
||||
timestamp_start=746203272,
|
||||
timestamp_end=746203272,
|
||||
)
|
||||
|
||||
|
||||
def test_start():
|
||||
tutils.raises(ValueError, har_extractor.start, Context(), [])
|
||||
|
||||
|
||||
def test_response():
|
||||
ctx = Context()
|
||||
ctx.HARLog = har_extractor._HARLog([])
|
||||
ctx.seen_server = set()
|
||||
|
||||
fl = tutils.tflow(req=trequest, resp=tresponse)
|
||||
har_extractor.response(ctx, fl)
|
||||
|
||||
with open(tutils.test_data.path("data/har_extractor.har")) as fp:
|
||||
test_data = json.load(fp)
|
||||
assert json.loads(ctx.HARLog.json()) == test_data["test_response"]
|
Loading…
Reference in New Issue
Block a user