From 244ef243d75145a01d9029589de65be51299b3f3 Mon Sep 17 00:00:00 2001 From: Krzysztof Bielicki Date: Tue, 10 Mar 2015 10:44:06 +0100 Subject: [PATCH] [#514] Add support for ignoring payload params in multipart/form-data --- libmproxy/console/contentview.py | 24 ++---------------------- libmproxy/flow.py | 2 +- libmproxy/protocol/http.py | 21 ++++++++++++++++++++- libmproxy/utils.py | 27 +++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/libmproxy/console/contentview.py b/libmproxy/console/contentview.py index 582723bb1..84e9946d9 100644 --- a/libmproxy/console/contentview.py +++ b/libmproxy/console/contentview.py @@ -210,33 +210,13 @@ class ViewMultipart: prompt = ("multipart", "m") content_types = ["multipart/form-data"] def __call__(self, hdrs, content, limit): - v = hdrs.get_first("content-type") + v = utils.multipartdecode(hdrs, content) if v: - v = utils.parse_content_type(v) - if not v: - return - boundary = v[2].get("boundary") - if not boundary: - return - - rx = re.compile(r'\bname="([^"]+)"') - keys = [] - vals = [] - - for i in content.split("--" + boundary): - parts = i.splitlines() - if len(parts) > 1 and parts[0][0:2] != "--": - match = rx.search(parts[1]) - if match: - keys.append(match.group(1) + ":") - vals.append(netlib.utils.cleanBin( - "\n".join(parts[3+parts[2:].index(""):]) - )) r = [ urwid.Text(("highlight", "Form data:\n")), ] r.extend(common.format_keyvals( - zip(keys, vals), + v, key = "header", val = "text" )) diff --git a/libmproxy/flow.py b/libmproxy/flow.py index 43580109c..0e9e481c8 100644 --- a/libmproxy/flow.py +++ b/libmproxy/flow.py @@ -236,7 +236,7 @@ class ServerPlaybackState: ] if not self.ignore_content: - form_contents = r.get_form_urlencoded() + form_contents = r.get_form() if self.ignore_payload_params and form_contents: key.extend( p for p in form_contents diff --git a/libmproxy/protocol/http.py b/libmproxy/protocol/http.py index 49310ec3f..512cf75b1 100644 --- a/libmproxy/protocol/http.py +++ b/libmproxy/protocol/http.py @@ -15,6 +15,7 @@ from ..proxy.connection import ServerConnection from .. import encoding, utils, controller, stateobject, proxy HDR_FORM_URLENCODED = "application/x-www-form-urlencoded" +HDR_FORM_MULTIPART = "multipart/form-data" CONTENT_MISSING = 0 @@ -507,6 +508,19 @@ class HTTPRequest(HTTPMessage): """ self.headers["Host"] = [self.host] + def get_form(self): + """ + Retrieves the URL-encoded or multipart form data, returning an ODict object. + Returns an empty ODict if there is no data or the content-type + indicates non-form data. + """ + if self.content: + if self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): + return self.get_form_urlencoded() + elif self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return self.get_form_multipart() + return ODict([]) + def get_form_urlencoded(self): """ Retrieves the URL-encoded form data, returning an ODict object. @@ -514,7 +528,12 @@ class HTTPRequest(HTTPMessage): indicates non-form data. """ if self.content and self.headers.in_any("content-type", HDR_FORM_URLENCODED, True): - return ODict(utils.urldecode(self.content)) + return ODict(utils.urldecode(self.content)) + return ODict([]) + + def get_form_multipart(self): + if self.content and self.headers.in_any("content-type", HDR_FORM_MULTIPART, True): + return ODict(utils.multipartdecode(self.headers, self.content)) return ODict([]) def set_form_urlencoded(self, odict): diff --git a/libmproxy/utils.py b/libmproxy/utils.py index 51f2dc26a..b84c589a4 100644 --- a/libmproxy/utils.py +++ b/libmproxy/utils.py @@ -69,6 +69,33 @@ def urlencode(s): return urllib.urlencode(s, False) +def multipartdecode(hdrs, content): + """ + Takes a multipart boundary encoded string and returns list of (key, value) tuples. + """ + v = hdrs.get_first("content-type") + if v: + v = parse_content_type(v) + if not v: + return [] + boundary = v[2].get("boundary") + if not boundary: + return [] + + rx = re.compile(r'\bname="([^"]+)"') + r = [] + + for i in content.split("--" + boundary): + parts = i.splitlines() + if len(parts) > 1 and parts[0][0:2] != "--": + match = rx.search(parts[1]) + if match: + key = match.group(1) + value = "".join(parts[3+parts[2:].index(""):]) + r.append((key, value)) + return r + return [] + def pretty_size(size): suffixes = [ ("B", 2**10),