diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/har_extractor.py b/examples/har_extractor.py index e7718fe80..25661f7c7 100644 --- a/examples/har_extractor.py +++ b/examples/har_extractor.py @@ -1,5 +1,4 @@ """ - This inline script utilizes harparser.HAR from https://github.com/JustusW/harparser to generate a HAR log object. """ @@ -17,7 +16,7 @@ class _HARLog(HAR.log): __page_count__ = 0 __page_ref__ = {} - def __init__(self, page_list): + def __init__(self, page_list=[]): self.__page_list__ = page_list self.__page_count__ = 0 self.__page_ref__ = {} @@ -67,7 +66,7 @@ def start(context, argv): '(- will output to stdout, filenames ending with .zhar ' 'will result in compressed har)' ) - context.HARLog = _HARLog(['https://github.com']) + context.HARLog = _HARLog() context.seen_server = set() @@ -83,17 +82,17 @@ def response(context, flow): # Calculate the connect_time for this server_conn. Afterwards add it to # seen list, in order to avoid the connect_time being present in entries # that use an existing connection. - connect_time = flow.server_conn.timestamp_tcp_setup - \ - flow.server_conn.timestamp_start + connect_time = (flow.server_conn.timestamp_tcp_setup - + flow.server_conn.timestamp_start) context.seen_server.add(flow.server_conn) if flow.server_conn.timestamp_ssl_setup is not None: # Get the ssl_time for this server_conn as the difference between # the start of the successful tcp setup and the successful ssl - # setup. If no ssl setup has been made it is left as -1 since it + # setup. If no ssl setup has been made it is left as -1 since it # doesn't apply to this connection. - ssl_time = flow.server_conn.timestamp_ssl_setup - \ - flow.server_conn.timestamp_tcp_setup + ssl_time = (flow.server_conn.timestamp_ssl_setup - + flow.server_conn.timestamp_tcp_setup) # Calculate the raw timings from the different timestamps present in the # request and response object. For lack of a way to measure it dns timings @@ -112,80 +111,58 @@ def response(context, flow): # HAR timings are integers in ms, so we have to re-encode the raw timings to # that format. - timings = dict([(key, int(1000 * value)) - for key, value in timings_raw.iteritems()]) + timings = dict([(k, int(1000 * v)) for k, v in timings_raw.iteritems()]) - # The full_time is the sum of all timings. Timings set to -1 will be ignored - # as per spec. - full_time = 0 - for item in timings.values(): - if item > -1: - full_time += item + # The full_time is the sum of all timings. + # Timings set to -1 will be ignored as per spec. + full_time = sum(v for v in timings.values() if v > -1) - started_date_time = datetime.fromtimestamp( - flow.request.timestamp_start, - tz=utc).isoformat() + started_date_time = datetime.utcfromtimestamp( + flow.request.timestamp_start).isoformat() request_query_string = [{"name": k, "value": v} - for k, v in flow.request.query] - request_http_version = flow.request.http_version - # Cookies are shaped as tuples by MITMProxy. - request_cookies = [{"name": k.strip(), "value": v[0]} - for k, v in flow.request.cookies.items()] - request_headers = [{"name": k, "value": v} for k, v in flow.request.headers] - request_headers_size = len(str(flow.request.headers)) - request_body_size = len(flow.request.content) + for k, v in flow.request.query or {}] - response_http_version = flow.response.http_version - # Cookies are shaped as tuples by MITMProxy. - response_cookies = [{"name": k.strip(), "value": v[0]} - for k, v in flow.response.cookies.items()] - response_headers = [{"name": k, "value": v} - for k, v in flow.response.headers] - response_headers_size = len(str(flow.response.headers)) response_body_size = len(flow.response.content) response_body_decoded_size = len(flow.response.get_decoded_content()) response_body_compression = response_body_decoded_size - response_body_size - response_mime_type = flow.response.headers.get('Content-Type', '') - response_redirect_url = flow.response.headers.get('Location', '') - entry = HAR.entries( - { - "startedDateTime": started_date_time, - "time": full_time, - "request": { - "method": flow.request.method, - "url": flow.request.url, - "httpVersion": request_http_version, - "cookies": request_cookies, - "headers": request_headers, - "queryString": request_query_string, - "headersSize": request_headers_size, - "bodySize": request_body_size, + entry = HAR.entries({ + "startedDateTime": started_date_time, + "time": full_time, + "request": { + "method": flow.request.method, + "url": flow.request.url, + "httpVersion": flow.request.http_version, + "cookies": format_cookies(flow.request.cookies), + "headers": format_headers(flow.request.headers), + "queryString": request_query_string, + "headersSize": len(str(flow.request.headers)), + "bodySize": len(flow.request.content), + }, + "response": { + "status": flow.response.status_code, + "statusText": flow.response.msg, + "httpVersion": flow.response.http_version, + "cookies": format_cookies(flow.response.cookies), + "headers": format_headers(flow.response.headers), + "content": { + "size": response_body_size, + "compression": response_body_compression, + "mimeType": flow.response.headers.get('Content-Type', '') }, - "response": { - "status": flow.response.status_code, - "statusText": flow.response.msg, - "httpVersion": response_http_version, - "cookies": response_cookies, - "headers": response_headers, - "content": { - "size": response_body_size, - "compression": response_body_compression, - "mimeType": response_mime_type}, - "redirectURL": response_redirect_url, - "headersSize": response_headers_size, - "bodySize": response_body_size, - }, - "cache": {}, - "timings": timings, - }) + "redirectURL": flow.response.headers.get('Location', ''), + "headersSize": len(str(flow.response.headers)), + "bodySize": response_body_size, + }, + "cache": {}, + "timings": timings, + }) - # If the current url is in the page list of context.HARLog or does not have - # a referrer we add it as a new pages object. - if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get( - 'Referer', - None) is None: + # If the current url is in the page list of context.HARLog or + # does not have a referrer, we add it as a new pages object. + if (flow.request.url in context.HARLog.get_page_list() or + flow.request.headers.get('Referer') is None): page_id = context.HARLog.create_page_id() context.HARLog.add( HAR.pages({ @@ -215,7 +192,7 @@ def done(context): """ Called once on script shutdown, after any other events. """ - from pprint import pprint + import pprint import json json_dump = context.HARLog.json() @@ -239,6 +216,18 @@ def done(context): ) +def format_cookies(obj): + if obj: + return [{"name": k.strip(), "value": v[0]} for k, v in obj.items()] + return "" + + +def format_headers(obj): + if obj: + return [{"name": k, "value": v} for k, v in obj.fields] + return "" + + def print_attributes(obj, filter_string=None, hide_privates=False): """ Useful helper method to quickly get all attributes of an object and its diff --git a/test/mitmproxy/data/har_extractor.har b/test/mitmproxy/data/har_extractor.har new file mode 100644 index 000000000..2f5099b30 --- /dev/null +++ b/test/mitmproxy/data/har_extractor.har @@ -0,0 +1,78 @@ +{ + "test_response": { + "log": { + "__page_count__": 1, + "version": "1.2", + "creator": { + "comment": "", + "version": "0.1", + "name": "MITMPROXY HARExtractor" + }, + "pages": [ + { + "startedDateTime": "1993-08-24T14:41:12", + "id": "autopage_1", + "title": "http://address:22/path" + } + ], + "entries": [ + { + "pageref": "autopage_1", + "startedDateTime": "1993-08-24T14:41:12", + "cache": {}, + "request": { + "cookies": [], + "url": "http://address:22/path", + "queryString": [], + "headers": [ + { + "name": "header", + "value": "qvalue" + }, + { + "name": "content-length", + "value": "7" + } + ], + "headersSize": 35, + "httpVersion": "HTTP/1.1", + "method": "GET", + "bodySize": 7 + }, + "timings": { + "receive": 0, + "ssl": 1000, + "connect": 1000, + "send": 0, + "wait": 0 + }, + "time": 2000, + "response": { + "status": 200, + "cookies": [], + "statusText": "OK", + "content": { + "mimeType": "", + "compression": 0, + "size": 7 + }, + "headers": [ + { + "name": "content-length", + "value": "7" + }, + { + "name": "header-response", + "value": "svalue" + } + ], + "headersSize": 44, + "redirectURL": "", + "httpVersion": "HTTP/1.1", + "bodySize": 7 + } + } + ] + } + } +} \ No newline at end of file diff --git a/test/mitmproxy/test_har_extractor.py b/test/mitmproxy/test_har_extractor.py new file mode 100644 index 000000000..7838f7133 --- /dev/null +++ b/test/mitmproxy/test_har_extractor.py @@ -0,0 +1,37 @@ +import json +import netlib.tutils +from . import tutils + +from examples import har_extractor + + +class Context(object): + pass + + +trequest = netlib.tutils.treq( + timestamp_start=746203272, + timestamp_end=746203272, +) + +tresponse = netlib.tutils.tresp( + timestamp_start=746203272, + timestamp_end=746203272, +) + + +def test_start(): + tutils.raises(ValueError, har_extractor.start, Context(), []) + + +def test_response(): + ctx = Context() + ctx.HARLog = har_extractor._HARLog([]) + ctx.seen_server = set() + + fl = tutils.tflow(req=trequest, resp=tresponse) + har_extractor.response(ctx, fl) + + with open(tutils.test_data.path("data/har_extractor.har")) as fp: + test_data = json.load(fp) + assert json.loads(ctx.HARLog.json()) == test_data["test_response"]