mirror of
https://github.com/Grasscutters/mitmproxy.git
synced 2024-11-21 22:58:24 +00:00
Merge pull request #5099 from EndUser509/save_streamed_data2
Save streamed data
This commit is contained in:
parent
6ec97d0b8e
commit
a0cf273484
@ -24,6 +24,7 @@
|
||||
* Fix random connection stalls (#5040, @EndUser509)
|
||||
* Add `n` new flow keybind to mitmweb (#5061, @ianklatzco)
|
||||
* Fix compatibility with BoringSSL (@pmoulton)
|
||||
* Add example addon for saving streamed data to individual files (@EndUser509)
|
||||
* Change connection event hooks to be blocking.
|
||||
Processing will only resume once the event hook has finished. (@Prinzhorn)
|
||||
* Allow addon hooks to be async (@nneonneo, #4207)
|
||||
|
120
examples/contrib/save_streamed_data.py
Normal file
120
examples/contrib/save_streamed_data.py
Normal file
@ -0,0 +1,120 @@
|
||||
"""
|
||||
Save streamed requests and responses
|
||||
|
||||
If the option 'save_streamed_data' is set to a format string then
|
||||
streamed requests and responses are written to individual files with a name
|
||||
derived from the string. Apart from python strftime() formating (using the
|
||||
request start time) the following codes can also be used:
|
||||
- %+T: The time stamp of the request with microseconds
|
||||
- %+D: 'req' or 'rsp' indicating the direction of the data
|
||||
- %+I: The client connection ID
|
||||
- %+C: The client IP address
|
||||
A good starting point for a template could be '~/streamed_files/%+D:%+T:%+I',
|
||||
a more complex example is '~/streamed_files/%+C/%Y-%m-%d%/%+D:%+T:%+I'.
|
||||
The client connection ID combined with the request time stamp should be unique
|
||||
for associating a file with its corresponding flow in the stream saved with
|
||||
'--save-stream-file'.
|
||||
|
||||
This addon is not compatible with addons that use the same mechanism to
|
||||
capture streamed data, http-stream-modify.py for instance.
|
||||
"""
|
||||
from mitmproxy import ctx
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import os
|
||||
import typing
|
||||
|
||||
|
||||
class StreamSaver:
|
||||
|
||||
TAG = "save_streamed_data: "
|
||||
|
||||
def __init__(self, flow, direction):
|
||||
self.flow = flow
|
||||
self.direction = direction
|
||||
self.fh = None
|
||||
self.path = None
|
||||
|
||||
def done(self):
|
||||
if self.fh:
|
||||
self.fh.close()
|
||||
self.fh = None
|
||||
# Make sure we have no circular references
|
||||
self.flow = None
|
||||
|
||||
def __call__(self, data):
|
||||
# End of stream?
|
||||
if len(data) == 0:
|
||||
self.done()
|
||||
return data
|
||||
|
||||
# Just in case the option changes while a stream is in flight
|
||||
if not ctx.options.save_streamed_data:
|
||||
return data
|
||||
|
||||
# This is a safeguard but should not be needed
|
||||
if not self.flow or not self.flow.request:
|
||||
return data
|
||||
|
||||
if not self.fh:
|
||||
self.path = datetime.fromtimestamp(self.flow.request.timestamp_start).strftime(ctx.options.save_streamed_data)
|
||||
self.path = self.path.replace('%+T', str(self.flow.request.timestamp_start))
|
||||
self.path = self.path.replace('%+I', str(self.flow.client_conn.id))
|
||||
self.path = self.path.replace('%+D', self.direction)
|
||||
self.path = self.path.replace('%+C', self.flow.client_conn.address[0])
|
||||
self.path = os.path.expanduser(self.path)
|
||||
|
||||
parent = Path(self.path).parent
|
||||
try:
|
||||
if not parent.exists():
|
||||
parent.mkdir(parents=True, exist_ok=True)
|
||||
except OSError:
|
||||
ctx.log.error(f"{self.TAG}Failed to create directory: {parent}")
|
||||
|
||||
try:
|
||||
self.fh = open(self.path, "wb", buffering=0)
|
||||
except OSError:
|
||||
ctx.log.error(f"{self.TAG}Failed to open for writing: {self.path}")
|
||||
|
||||
if self.fh:
|
||||
try:
|
||||
self.fh.write(data)
|
||||
except OSError:
|
||||
ctx.log.error(f"{self.TAG}Failed to write to: {self.path}")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def load(loader):
|
||||
loader.add_option(
|
||||
"save_streamed_data", typing.Optional[str], None,
|
||||
"Format string for saving streamed data to files. If set each streamed request or response is written "
|
||||
"to a file with a name derived from the string. In addition to formating supported by python "
|
||||
"strftime() (using the request start time) the code '%+T' is replaced with the time stamp of the request, "
|
||||
"'%+D' by 'req' or 'rsp' depending on the direction of the data, '%+C' by the client IP addresses and "
|
||||
"'%+I' by the client connection ID."
|
||||
)
|
||||
|
||||
|
||||
def requestheaders(flow):
|
||||
if ctx.options.save_streamed_data and flow.request.stream:
|
||||
flow.request.stream = StreamSaver(flow, 'req')
|
||||
|
||||
|
||||
def responseheaders(flow):
|
||||
if isinstance(flow.request.stream, StreamSaver):
|
||||
flow.request.stream.done()
|
||||
if ctx.options.save_streamed_data and flow.response.stream:
|
||||
flow.response.stream = StreamSaver(flow, 'rsp')
|
||||
|
||||
|
||||
def response(flow):
|
||||
if isinstance(flow.response.stream, StreamSaver):
|
||||
flow.response.stream.done()
|
||||
|
||||
|
||||
def error(flow):
|
||||
if flow.request and isinstance(flow.request.stream, StreamSaver):
|
||||
flow.request.stream.done()
|
||||
if flow.response and isinstance(flow.response.stream, StreamSaver):
|
||||
flow.response.stream.done()
|
Loading…
Reference in New Issue
Block a user