Merge pull request #5099 from EndUser509/save_streamed_data2

Save streamed data
This commit is contained in:
EndUser509 2022-03-15 21:34:49 +01:00 committed by GitHub
parent 6ec97d0b8e
commit a0cf273484
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 121 additions and 0 deletions

View File

@ -24,6 +24,7 @@
* Fix random connection stalls (#5040, @EndUser509)
* Add `n` new flow keybind to mitmweb (#5061, @ianklatzco)
* Fix compatibility with BoringSSL (@pmoulton)
* Add example addon for saving streamed data to individual files (@EndUser509)
* Change connection event hooks to be blocking.
Processing will only resume once the event hook has finished. (@Prinzhorn)
* Allow addon hooks to be async (@nneonneo, #4207)

View File

@ -0,0 +1,120 @@
"""
Save streamed requests and responses
If the option 'save_streamed_data' is set to a format string then
streamed requests and responses are written to individual files with a name
derived from the string. Apart from python strftime() formating (using the
request start time) the following codes can also be used:
- %+T: The time stamp of the request with microseconds
- %+D: 'req' or 'rsp' indicating the direction of the data
- %+I: The client connection ID
- %+C: The client IP address
A good starting point for a template could be '~/streamed_files/%+D:%+T:%+I',
a more complex example is '~/streamed_files/%+C/%Y-%m-%d%/%+D:%+T:%+I'.
The client connection ID combined with the request time stamp should be unique
for associating a file with its corresponding flow in the stream saved with
'--save-stream-file'.
This addon is not compatible with addons that use the same mechanism to
capture streamed data, http-stream-modify.py for instance.
"""
from mitmproxy import ctx
from datetime import datetime
from pathlib import Path
import os
import typing
class StreamSaver:
TAG = "save_streamed_data: "
def __init__(self, flow, direction):
self.flow = flow
self.direction = direction
self.fh = None
self.path = None
def done(self):
if self.fh:
self.fh.close()
self.fh = None
# Make sure we have no circular references
self.flow = None
def __call__(self, data):
# End of stream?
if len(data) == 0:
self.done()
return data
# Just in case the option changes while a stream is in flight
if not ctx.options.save_streamed_data:
return data
# This is a safeguard but should not be needed
if not self.flow or not self.flow.request:
return data
if not self.fh:
self.path = datetime.fromtimestamp(self.flow.request.timestamp_start).strftime(ctx.options.save_streamed_data)
self.path = self.path.replace('%+T', str(self.flow.request.timestamp_start))
self.path = self.path.replace('%+I', str(self.flow.client_conn.id))
self.path = self.path.replace('%+D', self.direction)
self.path = self.path.replace('%+C', self.flow.client_conn.address[0])
self.path = os.path.expanduser(self.path)
parent = Path(self.path).parent
try:
if not parent.exists():
parent.mkdir(parents=True, exist_ok=True)
except OSError:
ctx.log.error(f"{self.TAG}Failed to create directory: {parent}")
try:
self.fh = open(self.path, "wb", buffering=0)
except OSError:
ctx.log.error(f"{self.TAG}Failed to open for writing: {self.path}")
if self.fh:
try:
self.fh.write(data)
except OSError:
ctx.log.error(f"{self.TAG}Failed to write to: {self.path}")
return data
def load(loader):
loader.add_option(
"save_streamed_data", typing.Optional[str], None,
"Format string for saving streamed data to files. If set each streamed request or response is written "
"to a file with a name derived from the string. In addition to formating supported by python "
"strftime() (using the request start time) the code '%+T' is replaced with the time stamp of the request, "
"'%+D' by 'req' or 'rsp' depending on the direction of the data, '%+C' by the client IP addresses and "
"'%+I' by the client connection ID."
)
def requestheaders(flow):
if ctx.options.save_streamed_data and flow.request.stream:
flow.request.stream = StreamSaver(flow, 'req')
def responseheaders(flow):
if isinstance(flow.request.stream, StreamSaver):
flow.request.stream.done()
if ctx.options.save_streamed_data and flow.response.stream:
flow.response.stream = StreamSaver(flow, 'rsp')
def response(flow):
if isinstance(flow.response.stream, StreamSaver):
flow.response.stream.done()
def error(flow):
if flow.request and isinstance(flow.request.stream, StreamSaver):
flow.request.stream.done()
if flow.response and isinstance(flow.response.stream, StreamSaver):
flow.response.stream.done()