import logging import re, cStringIO, traceback, json import urwid try: from PIL import Image except ImportError: import Image try: from PIL.ExifTags import TAGS except ImportError: from ExifTags import TAGS import lxml.html, lxml.etree import netlib.utils import common from .. import utils, encoding, flow from ..contrib import jsbeautifier, html2text import subprocess try: import pyamf from pyamf import remoting, flex except ImportError: # pragma nocover pyamf = None try: import cssutils except ImportError: # pragma nocover cssutils = None else: cssutils.log.setLevel(logging.CRITICAL) cssutils.ser.prefs.keepComments = True cssutils.ser.prefs.omitLastSemicolon = False cssutils.ser.prefs.indentClosingBrace = False cssutils.ser.prefs.validOnly = False VIEW_CUTOFF = 1024*50 def _view_text(content, total, limit): """ Generates a body for a chunk of text. """ txt = [] for i in netlib.utils.cleanBin(content).splitlines(): txt.append( urwid.Text(("text", i), wrap="any") ) trailer(total, txt, limit) return txt def trailer(clen, txt, limit): rem = clen - limit if rem > 0: txt.append(urwid.Text("")) txt.append( urwid.Text( [ ("highlight", "... %s of data not shown. Press "%utils.pretty_size(rem)), ("key", "f"), ("highlight", " to load all data.") ] ) ) class ViewAuto: name = "Auto" prompt = ("auto", "a") content_types = [] def __call__(self, hdrs, content, limit): ctype = hdrs.get_first("content-type") if ctype: ct = utils.parse_content_type(ctype) if ctype else None ct = "%s/%s"%(ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](hdrs, content, limit) elif utils.isXML(content): return get("XML")(hdrs, content, limit) return get("Raw")(hdrs, content, limit) class ViewRaw: name = "Raw" prompt = ("raw", "r") content_types = [] def __call__(self, hdrs, content, limit): txt = _view_text(content[:limit], len(content), limit) return "Raw", txt class ViewHex: name = "Hex" prompt = ("hex", "e") content_types = [] def __call__(self, hdrs, content, limit): txt = [] for offset, hexa, s in netlib.utils.hexdump(content[:limit]): txt.append(urwid.Text([ ("offset", offset), " ", ("text", hexa), " ", ("text", s), ])) trailer(len(content), txt, limit) return "Hex", txt class ViewXML: name = "XML" prompt = ("xml", "x") content_types = ["text/xml"] def __call__(self, hdrs, content, limit): parser = lxml.etree.XMLParser(remove_blank_text=True, resolve_entities=False, strip_cdata=False, recover=False) try: document = lxml.etree.fromstring(content, parser) except lxml.etree.XMLSyntaxError: return None docinfo = document.getroottree().docinfo prev = [] p = document.getroottree().getroot().getprevious() while p is not None: prev.insert( 0, lxml.etree.tostring(p) ) p = p.getprevious() doctype=docinfo.doctype if prev: doctype += "\n".join(prev).strip() doctype = doctype.strip() s = lxml.etree.tostring( document, pretty_print=True, xml_declaration=True, doctype=doctype or None, encoding = docinfo.encoding ) txt = [] for i in s[:limit].strip().split("\n"): txt.append( urwid.Text(("text", i)), ) trailer(len(content), txt, limit) return "XML-like data", txt class ViewJSON: name = "JSON" prompt = ("json", "s") content_types = ["application/json"] def __call__(self, hdrs, content, limit): lines = utils.pretty_json(content) if lines: txt = [] sofar = 0 for i in lines: sofar += len(i) txt.append( urwid.Text(("text", i)), ) if sofar > limit: break trailer(sum(len(i) for i in lines), txt, limit) return "JSON", txt class ViewHTML: name = "HTML" prompt = ("html", "h") content_types = ["text/html"] def __call__(self, hdrs, content, limit): if utils.isXML(content): parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True) d = lxml.html.fromstring(content, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype) return "HTML", _view_text(s[:limit], len(s), limit) class ViewHTMLOutline: name = "HTML Outline" prompt = ("html outline", "o") content_types = ["text/html"] def __call__(self, hdrs, content, limit): content = content.decode("utf-8") h = html2text.HTML2Text(baseurl="") h.ignore_images = True h.body_width = 0 content = h.handle(content) txt = _view_text(content[:limit], len(content), limit) return "HTML Outline", txt class ViewURLEncoded: name = "URL-encoded" prompt = ("urlencoded", "u") content_types = ["application/x-www-form-urlencoded"] def __call__(self, hdrs, content, limit): lines = utils.urldecode(content) if lines: body = common.format_keyvals( [(k+":", v) for (k, v) in lines], key = "header", val = "text" ) return "URLEncoded form", body class ViewMultipart: name = "Multipart Form" prompt = ("multipart", "m") content_types = ["multipart/form-data"] def __call__(self, hdrs, content, limit): v = hdrs.get_first("content-type") if v: v = utils.parse_content_type(v) if not v: return boundary = v[2].get("boundary") if not boundary: return rx = re.compile(r'\bname="([^"]+)"') keys = [] vals = [] for i in content.split("--" + boundary): parts = i.splitlines() if len(parts) > 1 and parts[0][0:2] != "--": match = rx.search(parts[1]) if match: keys.append(match.group(1) + ":") vals.append(netlib.utils.cleanBin( "\n".join(parts[3+parts[2:].index(""):]) )) r = [ urwid.Text(("highlight", "Form data:\n")), ] r.extend(common.format_keyvals( zip(keys, vals), key = "header", val = "text" )) return "Multipart form", r if pyamf: class DummyObject(dict): def __init__(self, alias): dict.__init__(self) def __readamf__(self, input): data = input.readObject() self["data"] = data def pyamf_class_loader(s): for i in pyamf.CLASS_LOADERS: if i != pyamf_class_loader: v = i(s) if v: return v return DummyObject pyamf.register_class_loader(pyamf_class_loader) class ViewAMF: name = "AMF" prompt = ("amf", "f") content_types = ["application/x-amf"] def unpack(self, b, seen=set([])): if hasattr(b, "body"): return self.unpack(b.body, seen) if isinstance(b, DummyObject): if id(b) in seen: return "" else: seen.add(id(b)) for k, v in b.items(): b[k] = self.unpack(v, seen) return b elif isinstance(b, dict): for k, v in b.items(): b[k] = self.unpack(v, seen) return b elif isinstance(b, list): return [self.unpack(i) for i in b] elif isinstance(b, flex.ArrayCollection): return [self.unpack(i, seen) for i in b] else: return b def __call__(self, hdrs, content, limit): envelope = remoting.decode(content, strict=False) if not envelope: return None txt = [] for target, message in iter(envelope): if isinstance(message, pyamf.remoting.Request): txt.append(urwid.Text([ ("header", "Request: "), ("text", str(target)), ])) else: txt.append(urwid.Text([ ("header", "Response: "), ("text", "%s, code %s"%(target, message.status)), ])) s = json.dumps(self.unpack(message), indent=4) txt.extend(_view_text(s[:limit], len(s), limit)) return "AMF v%s"%envelope.amfVersion, txt class ViewJavaScript: name = "JavaScript" prompt = ("javascript", "j") content_types = [ "application/x-javascript", "application/javascript", "text/javascript" ] def __call__(self, hdrs, content, limit): opts = jsbeautifier.default_options() opts.indent_size = 2 res = jsbeautifier.beautify(content[:limit], opts) return "JavaScript", _view_text(res, len(res), limit) class ViewCSS: name = "CSS" prompt = ("CSS", "c") content_types = [ "text/css" ] def __call__(self, hdrs, content, limit): if cssutils: sheet = cssutils.parseString(content) beautified = sheet.cssText else: beautified = content return "CSS", _view_text(beautified, len(beautified), limit) class ViewImage: name = "Image" prompt = ("image", "i") content_types = [ "image/png", "image/jpeg", "image/gif", "image/vnd.microsoft.icon", "image/x-icon", ] def __call__(self, hdrs, content, limit): try: img = Image.open(cStringIO.StringIO(content)) except IOError: return None parts = [ ("Format", str(img.format_description)), ("Size", "%s x %s px"%img.size), ("Mode", str(img.mode)), ] for i in sorted(img.info.keys()): if i != "exif": parts.append( (str(i), str(img.info[i])) ) if hasattr(img, "_getexif"): ex = img._getexif() if ex: for i in sorted(ex.keys()): tag = TAGS.get(i, i) parts.append( (str(tag), str(ex[i])) ) clean = [] for i in parts: clean.append([netlib.utils.cleanBin(i[0]), netlib.utils.cleanBin(i[1])]) fmt = common.format_keyvals( clean, key = "header", val = "text" ) return "%s image"%img.format, fmt class ViewProtobuf: """Human friendly view of protocol buffers The view uses the protoc compiler to decode the binary """ name = "Protocol Buffer" prompt = ("protobuf", "p") content_types = ["application/x-protobuf"] @staticmethod def is_available(): try: p = subprocess.Popen(["protoc", "--version"], stdout=subprocess.PIPE) out, _ = p.communicate() return out.startswith("libprotoc") except: return False def decode_protobuf(self, content): # if Popen raises OSError, it will be caught in # get_content_view and fall back to Raw p = subprocess.Popen(['protoc', '--decode_raw'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate(input=content) if out: return out else: return err def __call__(self, hdrs, content, limit): decoded = self.decode_protobuf(content) txt = _view_text(decoded[:limit], len(decoded), limit) return "Protobuf", txt views = [ ViewAuto(), ViewRaw(), ViewHex(), ViewJSON(), ViewXML(), ViewHTML(), ViewHTMLOutline(), ViewJavaScript(), ViewCSS(), ViewURLEncoded(), ViewMultipart(), ViewImage(), ] if pyamf: views.append(ViewAMF()) if ViewProtobuf.is_available(): views.append(ViewProtobuf()) content_types_map = {} for i in views: for ct in i.content_types: l = content_types_map.setdefault(ct, []) l.append(i) view_prompts = [i.prompt for i in views] def get_by_shortcut(c): for i in views: if i.prompt[1] == c: return i def get(name): for i in views: if i.name == name: return i def get_content_view(viewmode, hdrItems, content, limit, logfunc): """ Returns a (msg, body) tuple. """ if not content: return ("No content", "") msg = [] hdrs = flow.ODictCaseless([list(i) for i in hdrItems]) enc = hdrs.get_first("content-encoding") if enc and enc != "identity": decoded = encoding.decode(enc, content) if decoded: content = decoded msg.append("[decoded %s]"%enc) try: ret = viewmode(hdrs, content, limit) # Third-party viewers can fail in unexpected ways... except Exception, e: s = traceback.format_exc() s = "Content viewer failed: \n" + s logfunc(s) ret = None if not ret: ret = get("Raw")(hdrs, content, limit) msg.append("Couldn't parse: falling back to Raw") else: msg.append(ret[0]) return " ".join(msg), ret[1]