diff --git a/.gitignore b/.gitignore index 6e329a62b..782116d32 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ MANIFEST *.swo mitmproxyc mitmdumpc +mitmplaybackc +mitmrecordc diff --git a/README.mkd b/README.mkd index aab928774..52da3f90a 100644 --- a/README.mkd +++ b/README.mkd @@ -1,3 +1,14 @@ +This version is modified to add recording and playback capabilities enabling +mitmproxy to record a complete session and allow clients to play back the +same session against the recorded data. + +The store is enabled by using the --store=.. option to mitmproxy or by using +the noninteractive mitmrecord program. + +To play back a recorded session use the mitmplayback program. + +Original README follows: + __mitmproxy__ is an interactive SSL-capable intercepting HTTP proxy. It lets you to observe, modify and replay requests and responses on the fly. The underlying library that mitmproxy is built on can also be used to do these diff --git a/libmproxy/playback.py b/libmproxy/playback.py new file mode 100644 index 000000000..920b2e0c4 --- /dev/null +++ b/libmproxy/playback.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import controller +import utils +import proxy +import recorder + +class PlaybackMaster(controller.Master): + """ + A simple master that plays back recorded responses. + """ + def __init__(self, server, options): + self.verbosity = options.verbose + self.store = recorder.Recorder(options) + controller.Master.__init__(self, server) + + def run(self): + try: + return controller.Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def process_missing_response(self, request): + response = None + print >> sys.stderr, self.store.normalize_request(request).assemble_proxy() + print >> sys.stderr, "Actions:" + print >> sys.stderr, " q Quit" + print >> sys.stderr, " a(dd) Add pattern rule" + print >> sys.stderr, " A(dd) Add pattern rule (forced)" + print >> sys.stderr, " e(rror) respond with a 404 error" + print >> sys.stderr, " k(ill) kill the request, empty response" + print >> sys.stderr, " f(orward) forward the request to the requested server and cache response" + command = raw_input("Action: ") + command = command[:1] + if command == 'q': + self.shutdown() + return None + elif command == 'a' or command == 'A': + filt = raw_input("Filter: ") + search = raw_input("Search pattern: ") + replace = raw_input("Replacement string: ") + self.store.add_rule(filt, search, replace) + if command == 'A': + self.store.save_rule(filt, search, replace) + elif command == 'e': + return proxy.Response(request, "404", "Not found", utils.Headers(), "Not found") + elif command == 'k': + return None + elif command == 'f': + return request + else: + print >> sys.stderr, "ERROR: Unknown command" + return self.process_missing_response(request) + try: + response = self.store.get_response(request) + if command == 'a': + self.store.save_rule(filt, search, replace) + except proxy.ProxyError: + print >> sys.stderr, "ERROR: Malformed substitution rule" + self.store.forget_last_rule() + response = self.process_missing_response(request) + except IOError: + print >> sys.stderr, "NOTICE: Response still not found" + if command == 'a': + self.store.forget_last_rule() + response = self.process_missing_response(request) + return response + + def handle_request(self, msg): + request = msg + try: + response = self.store.get_response(request) + except IOError: + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, "ERROR: No matching response.", + print >> sys.stderr, ",".join(self.store.cookies) + response = self.process_missing_response(msg) + msg.ack(response) + + def handle_response(self, msg): + request = msg.request + response = msg + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, response.short() + if not response.is_cached(): + self.store.save_response(response) + msg.ack(self.store.filter_response(msg)) diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py index e3eace3b9..631e24707 100644 --- a/libmproxy/proxy.py +++ b/libmproxy/proxy.py @@ -124,6 +124,7 @@ def parse_request_line(request): class Request(controller.Msg): FMT = '%s %s HTTP/1.1\r\n%s\r\n%s' + FMT_PROXY = '%s %s://%s:%s%s HTTP/1.1\r\n%s\r\n%s' def __init__(self, client_conn, host, port, scheme, method, path, headers, content, timestamp=None): self.client_conn = client_conn self.host, self.port, self.scheme = host, port, scheme @@ -132,6 +133,9 @@ class Request(controller.Msg): self.close = False controller.Msg.__init__(self) + def is_cached(self): + return False + def get_state(self): return dict( host = self.host, @@ -189,7 +193,10 @@ class Request(controller.Msg): def short(self): return "%s %s"%(self.method, self.url()) - def assemble(self): + def assemble_proxy(self): + return self.assemble(True) + + def assemble(self, _proxy = False): """ Assembles the request for transmission to the server. We make some modifications to make sure interception works properly. @@ -210,8 +217,10 @@ class Request(controller.Msg): content = "" if self.close: headers["connection"] = ["close"] - data = (self.method, self.path, str(headers), content) - return self.FMT%data + if not _proxy: + return self.FMT % (self.method, self.path, str(headers), content) + else: + return self.FMT_PROXY % (self.method, self.scheme, self.host, self.port, self.path, str(headers), content) class Response(controller.Msg): @@ -221,6 +230,7 @@ class Response(controller.Msg): self.code, self.msg = code, msg self.headers, self.content = headers, content self.timestamp = timestamp or time.time() + self.cached = False controller.Msg.__init__(self) def get_state(self): @@ -256,6 +266,9 @@ class Response(controller.Msg): def is_response(self): return True + def is_cached(self): + return self.cached + def short(self): return "%s %s"%(self.code, self.msg) diff --git a/libmproxy/record.py b/libmproxy/record.py new file mode 100644 index 000000000..d32c87116 --- /dev/null +++ b/libmproxy/record.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import controller +import utils +import recorder + +class RecordMaster(controller.Master): + """ + A simple master that just records to files. + """ + def __init__(self, server, options): + self.verbosity = options.verbose + self.store = recorder.Recorder(options) + controller.Master.__init__(self, server) + + def run(self): + try: + return controller.Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def handle_request(self, msg): + msg.ack(self.store.filter_request(msg)) + + def handle_response(self, msg): + if self.verbosity > 0: + print >> sys.stderr, ">>", + print >> sys.stderr, msg.request.short() + print >> sys.stderr, "<<", + print >> sys.stderr, msg.short() + self.store.save_response(msg) + msg.ack(self.store.filter_response(msg)) diff --git a/libmproxy/recorder.py b/libmproxy/recorder.py new file mode 100644 index 000000000..51c8a6e02 --- /dev/null +++ b/libmproxy/recorder.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# HENRIK NORDSTROM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Alternatively you may use this file under a GPLv3 license as follows: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import time +import hashlib +import utils +import proxy +import collections +import itertools +import string +import Cookie +import filt +import re +import cStringIO + +def constant_factory(value): + return itertools.repeat(value).next + +class PatternRule: + """ + Request pattern rule + :_ivar _match filt pattern rule + :_ivar _search Regex pattern to search for + :_ivar _replace Replacement string + """ + def __init__(self, pattern, search, replace): + self.match = filt.parse(pattern) + self.search = re.compile(search) + self.replace = replace + def execute(self, request, text): + if self.match and not self.match(request): + return text + return re.sub(self.search, self.replace, text) + +class RecorderConnection(proxy.ServerConnection): + """ + Simulated ServerConnection connecting to the cache + """ + # Note: This may chane in future. Division between Recorder + # and RecorderConnection is not yet finalized + def __init__(self, request, fp): + self.host = request.host + self.port = request.port + self.scheme = request.scheme + self.close = False + self.server = fp + self.rfile = fp + self.wfile = fp + + def send_request(self, request): + self.request = request + + def read_response(self): + response = proxy.ServerConnection.read_response(self) + response.cached = True + return response + +class Recorder: + """ + A simple record/playback cache + """ + def __init__(self, options): + self.sequence = collections.defaultdict(int) + self.cookies = {} + try: + for cookie in options.cookies: + self.cookies[cookie] = True + except AttributeError: pass + self.verbosity = options.verbose + self.storedir = options.cache + self.patterns = [] + self.indexfp = None + self.reset_config() + + def reset_config(self): + self.patterns = [] + self.load_config("default") + + def add_rule(self, match, search, replace): + self.patterns.append(PatternRule(match, search, replace)) + + def forget_last_rule(self): + self.patterns.pop() + + def save_rule(self, match, search, replace, configfile = "default"): + fp = self.open(configfile + ".cfg", "a") + print >> fp, "Condition: " + match + print >> fp, "Search: " + search + print >> fp, "Replace: " + replace + fp.close() + + def load_config(self, name): + """ + Load configuration settings from name + """ + try: + file = name + ".cfg" + if self.verbosity > 2: + print >> sys.stderr, "config: " + file + fp = self.open(file, "r") + except IOError: + return False + for line in fp: + directive, value = line.split(" ", 1) + value = value.strip("\r\n") + if directive == "Cookie:": + self.cookies[value] = True + if directive == "Condition:": + match = value + if directive == "Search:": + search = value + if directive == "Replace:": + self.add_rule(match, search, value) + fp.close() + return True + + def filter_request(self, request): + """ + Filter forwarded requests to enable better recording + """ + request = request.copy() + headers = request.headers + utils.try_del(headers, 'if-modified-since') + utils.try_del(headers, 'if-none-match') + return request + + def normalize_request(self, request): + """ + Filter request to simplify storage matching + """ + request.close = False + req_text = request.assemble_proxy() + orig_req_text = req_text + for pattern in self.patterns: + req_text = pattern.execute(request, req_text) + if req_text == orig_req_text: + return request + fp = cStringIO.StringIO(req_text) + request_line = fp.readline() + method, scheme, host, port, path, httpminor = proxy.parse_request_line(request_line) + headers = utils.Headers() + headers.read(fp) + if request.content is None: + content = None + else: + content = fp.read() + return proxy.Request(request.client_conn, host, port, scheme, method, path, headers, content) + + def open(self, path, mode): + return open(self.storedir + "/" + path, mode) + + def pathn(self, request): + """ + Create cache file name and sequence number + """ + request = self.normalize_request(request) + request = self.filter_request(request) + headers = request.headers + urlkey = (request.host + request.path)[:80].translate(string.maketrans(":/?","__.")) + id = "" + if headers.has_key("cookie"): + cookies = Cookie.SimpleCookie("; ".join(headers["cookie"])) + del headers["cookie"] + for key, morsel in cookies.iteritems(): + if self.cookies.has_key(key): + id = id + key + "=" + morsel.value + "\n" + if self.verbosity > 1: + print >> sys.stderr, "ID: " + id + m = hashlib.sha224(id) + req_text = request.assemble_proxy() + if self.verbosity > 2: + print >> sys.stderr, req_text + m.update(req_text) + path = urlkey+"."+m.hexdigest() + n = str(self.sequence[path]) + if self.verbosity > 1: + print >> sys.stderr, "PATH: " + path + "." + n + return path, n + + def filter_response(self, response): + if response.headers.has_key('set-cookie'): + for header in response.headers['set-cookie']: + key = header.split('=',1)[0] + self.cookies[key] = True + return response + + def save_response(self, response): + """ + Save response for later playback + """ + + if self.indexfp is None: + self.indexfp = self.open("index.txt", "a") + try: + cfg = self.open("default.cfg", "r") + except: + cfg = self.open("default.cfg", "w") + for cookie in iter(self.cookies): + print >> cfg, "Cookie: " + cookie + cfg.close() + request = response.request + req_text = request.assemble_proxy() + resp_text = response.assemble() + path, n = self.pathn(request) + self.sequence[path] += 1 + + f = self.open(path+"."+n+".req", 'w') + f.write(req_text) + f.close() + f = self.open(path+"."+n+".resp", 'w') + f.write(resp_text) + f.close() + + print >> self.indexfp , time.time(), request.method, request.path + if request.headers.has_key('referer'): + print >> self.indexfp, 'referer:', ','.join(request.headers['referer']) + if len(self.cookies) > 0: + print >> self.indexfp, 'cookies:', ','.join(self.cookies) + print >> self.indexfp , path + print >> self.indexfp , "" + + + def get_response(self, request): + """ + Retrieve previously saved response saved by save_response + """ + path, n = self.pathn(request) + try: + fp = self.open(path+"."+n+".resp", 'r') + self.sequence[path]+=1 + except IOError: + fp = self.open(path+".resp", 'r') + server = RecorderConnection(request, fp) + fp = None # Handed over to RecorderConnection + server.send_request(request) + response = server.read_response() + server.terminate() + return response diff --git a/mitmplayback b/mitmplayback new file mode 100755 index 000000000..ea8020945 --- /dev/null +++ b/mitmplayback @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom +# +# Based on mitmproxy mitmdump +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys, os.path +from libmproxy import proxy, controller, playback, utils +from libmproxy import VERSION +from optparse import OptionParser, OptionGroup + + +if __name__ == '__main__': + parser = OptionParser( + usage = "%prog [options] output", + version="%%prog %s"%VERSION, + ) + + parser.add_option( + "-c", "--cert", action="store", + type = "str", dest="cert", default="~/.mitmproxy/cert.pem", + help = "SSL certificate file." + ) + + parser.add_option( + "-p", "--port", action="store", + type = "int", dest="port", default=8080, + help = "Port." + ) + + parser.add_option( + "-s", "--store", action="store", + type = "str", dest="cache", default="cache/", + help = "Session store location" + ) + + parser.add_option("-q", "--quiet", + action="store_true", dest="quiet", + help="Quiet.") + parser.add_option("-v", "--verbose", + action="count", dest="verbose", default=1, + help="Increase verbosity. Can be passed multiple times.") + + options, args = parser.parse_args() + + if options.quiet: + options.verbose = 0 + + certpath = os.path.expanduser(options.cert) + options.cache = os.path.expanduser(options.cache) + + if not os.path.exists(certpath): + print >> sys.stderr, "Creating bogus certificate at %s"%options.cert + utils.make_bogus_cert(certpath) + + proxy.config = proxy.Config( + certpath + ) + server = proxy.ProxyServer(options.port) + m = playback.PlaybackMaster(server, options) + m.run() diff --git a/mitmrecord b/mitmrecord new file mode 100755 index 000000000..0fcb91c2c --- /dev/null +++ b/mitmrecord @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Henrik Nordstrom +# +# Based on mitmproxy and mitmdump +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys, os.path, os, errno +from libmproxy import proxy, controller, record, utils +from libmproxy import VERSION +from optparse import OptionParser, OptionGroup + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST: + pass + else: + raise + +if __name__ == '__main__': + parser = OptionParser( + usage = "%prog [options] output", + version="%%prog %s"%VERSION, + ) + + parser.add_option( + "-c", "--cert", action="store", + type = "str", dest="cert", default="~/.mitmproxy/cert.pem", + help = "SSL certificate file." + ) + + parser.add_option( + "-p", "--port", action="store", + type = "int", dest="port", default=8080, + help = "Port." + ) + + parser.add_option( + "-s", "--store", action="store", + type = "str", dest="cache", default="cache/", + help = "Session store location" + ) + + parser.add_option( + "-C", "--cookies", action="append", + type = "str", dest="cookies", default=[], + help = "Persistent client cookies already set or generated in client" + ) + + parser.add_option("-q", "--quiet", + action="store_true", dest="quiet", + help="Quiet.") + parser.add_option("-v", "--verbose", + action="count", dest="verbose", default=1, + help="Increase verbosity. Can be passed multiple times.") + + options, args = parser.parse_args() + + if options.quiet: + options.verbose = 0 + + certpath = os.path.expanduser(options.cert) + options.cache = os.path.expanduser(options.cache) + + if not os.path.exists(certpath): + print >> sys.stderr, "Creating bogus certificate at %s"%options.cert + utils.make_bogus_cert(certpath) + + proxy.config = proxy.Config( + certpath + ) + server = proxy.ProxyServer(options.port) + mkdir_p(options.cache) + if os.path.exists(options.cache + "/index.txt"): + print >> sys.stderr, "ERROR: data already recorded in %s"%options.cache + sys.exit(1) + m = record.RecordMaster(server, options) + m.run() diff --git a/setup.py b/setup.py index d1abd0928..71107b2ff 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ setup( url = "http://corte.si/software", packages = packages, package_data = package_data, - scripts = ["mitmproxy", "mitmdump"], + scripts = ["mitmproxy", "mitmdump", "mitmrecord", "mitmplayback"], classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python",