mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-26 18:18:25 +00:00
Clean up har_extractor example
- Fix spacing, line length, unused imports, unusual import idioms - Prevent it from barfing into our test output
This commit is contained in:
parent
1b5f5021dc
commit
bb5fb2dbe0
@ -1,22 +1,18 @@
|
|||||||
"""
|
"""
|
||||||
This inline script utilizes harparser.HAR from https://github.com/JustusW/harparser
|
|
||||||
to generate a HAR log object.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from harparser import HAR
|
|
||||||
from pytz import UTC
|
|
||||||
except ImportError as e:
|
|
||||||
import sys
|
|
||||||
print >> sys.stderr, "\r\nMissing dependencies: please run `pip install mitmproxy[examples]`.\r\n"
|
|
||||||
raise
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta, tzinfo
|
This inline script utilizes harparser.HAR from
|
||||||
|
https://github.com/JustusW/harparser to generate a HAR log object.
|
||||||
|
"""
|
||||||
|
from harparser import HAR
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class _HARLog(HAR.log):
|
class _HARLog(HAR.log):
|
||||||
# The attributes need to be registered here for them to actually be available later via self. This is
|
# The attributes need to be registered here for them to actually be
|
||||||
# due to HAREncodable linking __getattr__ to __getitem__. Anything that is set only in __init__ will
|
# available later via self. This is due to HAREncodable linking __getattr__
|
||||||
# just be added as key/value pair to self.__classes__.
|
# to __getitem__. Anything that is set only in __init__ will just be added
|
||||||
|
# as key/value pair to self.__classes__.
|
||||||
__page_list__ = []
|
__page_list__ = []
|
||||||
__page_count__ = 0
|
__page_count__ = 0
|
||||||
__page_ref__ = {}
|
__page_ref__ = {}
|
||||||
@ -58,55 +54,66 @@ class _HARLog(HAR.log):
|
|||||||
|
|
||||||
def start(context, argv):
|
def start(context, argv):
|
||||||
"""
|
"""
|
||||||
On start we create a HARLog instance. You will have to adapt this to suit your actual needs
|
On start we create a HARLog instance. You will have to adapt this to
|
||||||
of HAR generation. As it will probably be necessary to cluster logs by IPs or reset them
|
suit your actual needs of HAR generation. As it will probably be
|
||||||
from time to time.
|
necessary to cluster logs by IPs or reset them from time to time.
|
||||||
"""
|
"""
|
||||||
context.dump_file = None
|
context.dump_file = None
|
||||||
if len(argv) > 1:
|
if len(argv) > 1:
|
||||||
context.dump_file = argv[1]
|
context.dump_file = argv[1]
|
||||||
else:
|
else:
|
||||||
raise ValueError('Usage: -s "har_extractor.py filename" '
|
raise ValueError(
|
||||||
'(- will output to stdout, filenames ending with .zhar will result in compressed har)')
|
'Usage: -s "har_extractor.py filename" '
|
||||||
|
'(- will output to stdout, filenames ending with .zhar '
|
||||||
|
'will result in compressed har)'
|
||||||
|
)
|
||||||
context.HARLog = _HARLog(['https://github.com'])
|
context.HARLog = _HARLog(['https://github.com'])
|
||||||
context.seen_server = set()
|
context.seen_server = set()
|
||||||
|
|
||||||
|
|
||||||
def response(context, flow):
|
def response(context, flow):
|
||||||
"""
|
"""
|
||||||
Called when a server response has been received. At the time of this message both
|
Called when a server response has been received. At the time of this
|
||||||
a request and a response are present and completely done.
|
message both a request and a response are present and completely done.
|
||||||
"""
|
"""
|
||||||
# Values are converted from float seconds to int milliseconds later.
|
# Values are converted from float seconds to int milliseconds later.
|
||||||
ssl_time = -.001
|
ssl_time = -.001
|
||||||
connect_time = -.001
|
connect_time = -.001
|
||||||
if flow.server_conn not in context.seen_server:
|
if flow.server_conn not in context.seen_server:
|
||||||
# Calculate the connect_time for this server_conn. Afterwards add it to seen list, in
|
# Calculate the connect_time for this server_conn. Afterwards add it to
|
||||||
# order to avoid the connect_time being present in entries that use an existing connection.
|
# seen list, in order to avoid the connect_time being present in entries
|
||||||
|
# that use an existing connection.
|
||||||
connect_time = flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start
|
connect_time = flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start
|
||||||
context.seen_server.add(flow.server_conn)
|
context.seen_server.add(flow.server_conn)
|
||||||
|
|
||||||
if flow.server_conn.timestamp_ssl_setup is not None:
|
if flow.server_conn.timestamp_ssl_setup is not None:
|
||||||
# Get the ssl_time for this server_conn as the difference between the start of the successful
|
# Get the ssl_time for this server_conn as the difference between
|
||||||
# tcp setup and the successful ssl setup. If no ssl setup has been made it is left as -1 since
|
# the start of the successful tcp setup and the successful ssl
|
||||||
# it doesn't apply to this connection.
|
# setup. If no ssl setup has been made it is left as -1 since it
|
||||||
|
# doesn't apply to this connection.
|
||||||
ssl_time = flow.server_conn.timestamp_ssl_setup - flow.server_conn.timestamp_tcp_setup
|
ssl_time = flow.server_conn.timestamp_ssl_setup - flow.server_conn.timestamp_tcp_setup
|
||||||
|
|
||||||
# Calculate the raw timings from the different timestamps present in the request and response object.
|
# Calculate the raw timings from the different timestamps present in the
|
||||||
# For lack of a way to measure it dns timings can not be calculated. The same goes for HAR blocked:
|
# request and response object. For lack of a way to measure it dns timings
|
||||||
# MITMProxy will open a server connection as soon as it receives the host and port from the client
|
# can not be calculated. The same goes for HAR blocked: MITMProxy will open
|
||||||
# connection. So the time spent waiting is actually spent waiting between request.timestamp_end and
|
# a server connection as soon as it receives the host and port from the
|
||||||
# response.timestamp_start thus it correlates to HAR wait instead.
|
# client connection. So the time spent waiting is actually spent waiting
|
||||||
timings_raw = {'send': flow.request.timestamp_end - flow.request.timestamp_start,
|
# between request.timestamp_end and response.timestamp_start thus it
|
||||||
'wait': flow.response.timestamp_start - flow.request.timestamp_end,
|
# correlates to HAR wait instead.
|
||||||
'receive': flow.response.timestamp_end - flow.response.timestamp_start,
|
timings_raw = {
|
||||||
'connect': connect_time,
|
'send': flow.request.timestamp_end - flow.request.timestamp_start,
|
||||||
'ssl': ssl_time}
|
'wait': flow.response.timestamp_start - flow.request.timestamp_end,
|
||||||
|
'receive': flow.response.timestamp_end - flow.response.timestamp_start,
|
||||||
|
'connect': connect_time,
|
||||||
|
'ssl': ssl_time
|
||||||
|
}
|
||||||
|
|
||||||
# HAR timings are integers in ms, so we have to re-encode the raw timings to that format.
|
# HAR timings are integers in ms, so we have to re-encode the raw timings to
|
||||||
|
# that format.
|
||||||
timings = dict([(key, int(1000 * value)) for key, value in timings_raw.iteritems()])
|
timings = dict([(key, int(1000 * value)) for key, value in timings_raw.iteritems()])
|
||||||
|
|
||||||
# The full_time is the sum of all timings. Timings set to -1 will be ignored as per spec.
|
# The full_time is the sum of all timings. Timings set to -1 will be ignored
|
||||||
|
# as per spec.
|
||||||
full_time = 0
|
full_time = 0
|
||||||
for item in timings.values():
|
for item in timings.values():
|
||||||
if item > -1:
|
if item > -1:
|
||||||
@ -157,21 +164,30 @@ def response(context, flow):
|
|||||||
"cache": {},
|
"cache": {},
|
||||||
"timings": timings, })
|
"timings": timings, })
|
||||||
|
|
||||||
# If the current url is in the page list of context.HARLog or does not have a referrer we add it as a new
|
# If the current url is in the page list of context.HARLog or does not have
|
||||||
# pages object.
|
# a referrer we add it as a new pages object.
|
||||||
if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get('Referer', None) is None:
|
if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get('Referer', None) is None:
|
||||||
page_id = context.HARLog.create_page_id()
|
page_id = context.HARLog.create_page_id()
|
||||||
context.HARLog.add(HAR.pages({"startedDateTime": entry['startedDateTime'],
|
context.HARLog.add(
|
||||||
"id": page_id,
|
HAR.pages({
|
||||||
"title": flow.request.url, }))
|
"startedDateTime": entry['startedDateTime'],
|
||||||
|
"id": page_id,
|
||||||
|
"title": flow.request.url,
|
||||||
|
})
|
||||||
|
)
|
||||||
context.HARLog.set_page_ref(flow.request.url, page_id)
|
context.HARLog.set_page_ref(flow.request.url, page_id)
|
||||||
entry['pageref'] = page_id
|
entry['pageref'] = page_id
|
||||||
|
|
||||||
# Lookup the referer in the page_ref of context.HARLog to point this entries pageref attribute to the right
|
# Lookup the referer in the page_ref of context.HARLog to point this entries
|
||||||
# pages object, then set it as a new reference to build a reference tree.
|
# pageref attribute to the right pages object, then set it as a new
|
||||||
|
# reference to build a reference tree.
|
||||||
elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None:
|
elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None:
|
||||||
entry['pageref'] = context.HARLog.get_page_ref(flow.request.headers['Referer'][0])
|
entry['pageref'] = context.HARLog.get_page_ref(
|
||||||
context.HARLog.set_page_ref(flow.request.headers['Referer'][0], entry['pageref'])
|
flow.request.headers['Referer'][0]
|
||||||
|
)
|
||||||
|
context.HARLog.set_page_ref(
|
||||||
|
flow.request.headers['Referer'][0], entry['pageref']
|
||||||
|
)
|
||||||
|
|
||||||
context.HARLog.add(entry)
|
context.HARLog.add(entry)
|
||||||
|
|
||||||
@ -186,22 +202,28 @@ def done(context):
|
|||||||
json_dump = context.HARLog.json()
|
json_dump = context.HARLog.json()
|
||||||
compressed_json_dump = context.HARLog.compress()
|
compressed_json_dump = context.HARLog.compress()
|
||||||
|
|
||||||
print "=" * 100
|
|
||||||
if context.dump_file == '-':
|
if context.dump_file == '-':
|
||||||
pprint(json.loads(json_dump))
|
context.log(pprint.pformat(json.loads(json_dump)))
|
||||||
elif context.dump_file.endswith('.zhar'):
|
elif context.dump_file.endswith('.zhar'):
|
||||||
file(context.dump_file, "w").write(compressed_json_dump)
|
file(context.dump_file, "w").write(compressed_json_dump)
|
||||||
else:
|
else:
|
||||||
file(context.dump_file, "w").write(json_dump)
|
file(context.dump_file, "w").write(json_dump)
|
||||||
print "=" * 100
|
context.log(
|
||||||
print "HAR log finished with %s bytes (%s bytes compressed)" % (len(json_dump), len(compressed_json_dump))
|
"HAR log finished with %s bytes (%s bytes compressed)" % (
|
||||||
print "Compression rate is %s%%" % str(100. * len(compressed_json_dump) / len(json_dump))
|
len(json_dump), len(compressed_json_dump)
|
||||||
print "=" * 100
|
)
|
||||||
|
)
|
||||||
|
context.log(
|
||||||
|
"Compression rate is %s%%" % str(
|
||||||
|
100. * len(compressed_json_dump) / len(json_dump)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def print_attributes(obj, filter_string=None, hide_privates=False):
|
def print_attributes(obj, filter_string=None, hide_privates=False):
|
||||||
"""
|
"""
|
||||||
Useful helper method to quickly get all attributes of an object and its values.
|
Useful helper method to quickly get all attributes of an object and its
|
||||||
|
values.
|
||||||
"""
|
"""
|
||||||
for attr in dir(obj):
|
for attr in dir(obj):
|
||||||
if hide_privates and "__" in attr:
|
if hide_privates and "__" in attr:
|
||||||
|
Loading…
Reference in New Issue
Block a user