Pretty view now indents Javascript.

Thanks to the JSBeautifier project, which is now included in the contrib directory.
This commit is contained in:
Aldo Cortesi 2012-03-25 10:56:45 +13:00
parent 74c51df580
commit 2240d2a6a5
12 changed files with 1607 additions and 6 deletions

View File

@ -2,6 +2,7 @@ import re
import urwid
import common
from .. import utils, encoding, flow
from ..contrib import jsbeautifier
VIEW_CUTOFF = 1024*100
@ -20,12 +21,14 @@ VIEW_CONTENT_PRETTY_TYPE_JSON = 1
VIEW_CONTENT_PRETTY_TYPE_XML = 2
VIEW_CONTENT_PRETTY_TYPE_URLENCODED = 3
VIEW_CONTENT_PRETTY_TYPE_MULTIPART = 4
VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT = 5
CONTENT_PRETTY_NAMES = {
VIEW_CONTENT_PRETTY_TYPE_JSON: "JSON",
VIEW_CONTENT_PRETTY_TYPE_XML: "XML",
VIEW_CONTENT_PRETTY_TYPE_URLENCODED: "URL-encoded",
VIEW_CONTENT_PRETTY_TYPE_MULTIPART: "Multipart Form"
VIEW_CONTENT_PRETTY_TYPE_MULTIPART: "Multipart Form",
VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT: "JavaScript",
}
CONTENT_TYPES_MAP = {
@ -34,6 +37,8 @@ CONTENT_TYPES_MAP = {
"text/xml": VIEW_CONTENT_PRETTY_TYPE_XML,
"multipart/form-data": VIEW_CONTENT_PRETTY_TYPE_MULTIPART,
"application/x-www-form-urlencoded": VIEW_CONTENT_PRETTY_TYPE_URLENCODED,
"application/x-javascript": VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT,
"application/javascript": VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT,
}
def trailer(clen, txt):
@ -49,13 +54,21 @@ def trailer(clen, txt):
)
def view_raw(hdrs, content):
def _view_text(content):
"""
Generates a body for a chunk of text.
"""
txt = []
for i in utils.cleanBin(content[:VIEW_CUTOFF]).splitlines():
txt.append(
urwid.Text(("text", i))
)
trailer(len(content), txt)
return txt
def view_raw(hdrs, content):
txt = _view_text(content)
return "Raw", txt
@ -144,11 +157,19 @@ def view_urlencoded(hdrs, content):
return "URLEncoded form", body
def view_javascript(hdrs, content):
opts = jsbeautifier.default_options()
opts.indent_size = 2
res = jsbeautifier.beautify(content, opts)
return "JavaScript", _view_text(res)
PRETTY_FUNCTION_MAP = {
VIEW_CONTENT_PRETTY_TYPE_XML: view_xmlish,
VIEW_CONTENT_PRETTY_TYPE_JSON: view_json,
VIEW_CONTENT_PRETTY_TYPE_URLENCODED: view_urlencoded,
VIEW_CONTENT_PRETTY_TYPE_MULTIPART: view_multipart,
VIEW_CONTENT_PRETTY_TYPE_JAVASCRIPT: view_javascript,
}
def get_view_func(viewmode, pretty_type, hdrs, content):

10
libmproxy/contrib/README Normal file
View File

@ -0,0 +1,10 @@
Contribs:
pyparsing 1.5.2, MIT license
jsbeautifier, git checkout 25/03/12, MIT license
- Removed test directories
- Disabled packers through a single-line modification (see "# CORTESI"
comment)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,25 @@
# UNPACKERS SPECIFICATIONS
Nothing very difficult: an unpacker is a submodule placed in the directory
where this file was found. Each unpacker must define three symbols:
* `PRIORITY` : integer number expressing the priority in applying this
unpacker. Lower number means higher priority.
Makes sense only if a source file has been packed with
more than one packer.
* `detect(source)` : returns `True` if source is packed, otherwise, `False`.
* `unpack(source)` : takes a `source` string and unpacks it. Must always return
valid JavaScript. That is to say, your code should look
like:
```
if detect(source):
return do_your_fancy_things_with(source)
else:
return source
```
*You can safely define any other symbol in your module, as it will be ignored.*
`__init__` code will automatically load new unpackers, without any further step
to be accomplished. Simply drop it in this directory.

View File

@ -0,0 +1,67 @@
#
# General code for JSBeautifier unpackers infrastructure. See README.specs
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""General code for JSBeautifier unpackers infrastructure."""
import pkgutil
import re
from jsbeautifier.unpackers import evalbased
# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js!
BLACKLIST = ['jsbeautifier.unpackers.evalbased']
class UnpackingError(Exception):
"""Badly packed source or general error. Argument is a
meaningful description."""
pass
def getunpackers():
"""Scans the unpackers dir, finds unpackers and add them to UNPACKERS list.
An unpacker will be loaded only if it is a valid python module (name must
adhere to naming conventions) and it is not blacklisted (i.e. inserted
into BLACKLIST."""
path = __path__
prefix = __name__ + '.'
unpackers = []
interface = ['unpack', 'detect', 'PRIORITY']
for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix):
if 'tests' not in modname and modname not in BLACKLIST:
try:
module = __import__(modname, fromlist=interface)
except ImportError:
raise UnpackingError('Bad unpacker: %s' % modname)
else:
unpackers.append(module)
return sorted(unpackers, key = lambda mod: mod.PRIORITY)
UNPACKERS = getunpackers()
def run(source, evalcode=False):
"""Runs the applicable unpackers and return unpacked source as a string."""
for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]:
source = unpacker.unpack(source)
if evalcode and evalbased.detect(source):
source = evalbased.unpack(source)
return source
def filtercomments(source):
"""NOT USED: strips trailing comments and put them at the top."""
trailing_comments = []
comment = True
while comment:
if re.search(r'^\s*\/\*', source):
comment = source[0, source.index('*/') + 2]
elif re.search(r'^\s*\/\/', source):
comment = re.search(r'^\s*\/\/', source).group(0)
else:
comment = None
if comment:
source = re.sub(r'^\s+', '', source[len(comment):])
trailing_comments.append(comment)
return '\n'.join(trailing_comments) + source

View File

@ -0,0 +1,39 @@
#
# Unpacker for eval() based packers, a part of javascript beautifier
# by Einar Lielmanis <einar@jsbeautifier.org>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
"""Unpacker for eval() based packers: runs JS code and returns result.
Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and
properly set up on host."""
from subprocess import PIPE, Popen
PRIORITY = 3
def detect(source):
"""Detects if source is likely to be eval() packed."""
return source.strip().lower().startswith('eval(function(')
def unpack(source):
"""Runs source and return resulting code."""
return jseval('print %s;' % source[4:]) if detect(source) else source
# In case of failure, we'll just return the original, without crashing on user.
def jseval(script):
"""Run code in the JS interpreter and return output."""
try:
interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE)
except OSError:
return script
result, errors = interpreter.communicate(script)
if interpreter.poll() or errors:
return script
return result

View File

@ -0,0 +1,58 @@
#
# simple unpacker/deobfuscator for scripts messed up with
# javascriptobfuscator.com
#
# written by Einar Lielmanis <einar@jsbeautifier.org>
# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# Will always return valid javascript: if `detect()` is false, `code` is
# returned, unmodified.
#
# usage:
#
# if javascriptobfuscator.detect(some_string):
# some_string = javascriptobfuscator.unpack(some_string)
#
"""deobfuscator for scripts messed up with JavascriptObfuscator.com"""
import re
PRIORITY = 1
def smartsplit(code):
"""Split `code` at " symbol, only if it is not escaped."""
strings = []
pos = 0
while pos < len(code):
if code[pos] == '"':
word = '' # new word
pos += 1
while pos < len(code):
if code[pos] == '"':
break
if code[pos] == '\\':
word += '\\'
pos += 1
word += code[pos]
pos += 1
strings.append('"%s"' % word)
pos += 1
return strings
def detect(code):
"""Detects if `code` is JavascriptObfuscator.com packed."""
# prefer `is not` idiom, so that a true boolean is returned
return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None)
def unpack(code):
"""Unpacks JavascriptObfuscator.com packed code."""
if detect(code):
matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code)
if matches:
variable = matches.group(1)
dictionary = smartsplit(matches.group(2))
code = code[len(matches.group(0)):]
for key, value in enumerate(dictionary):
code = code.replace(r'%s[%s]' % (variable, key), value)
return code

View File

@ -0,0 +1,86 @@
#
# deobfuscator for scripts messed up with myobfuscate.com
# by Einar Lielmanis <einar@jsbeautifier.org>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
# CAVEAT by Einar Lielmanis
#
# You really don't want to obfuscate your scripts there: they're tracking
# your unpackings, your script gets turned into something like this,
# as of 2011-08-26:
#
# var _escape = 'your_script_escaped';
# var _111 = document.createElement('script');
# _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' +
# '&ref=' + encodeURIComponent(document.referrer) +
# '&url=' + encodeURIComponent(document.URL);
# var 000 = document.getElementsByTagName('head')[0];
# 000.appendChild(_111);
# document.write(unescape(_escape));
#
"""Deobfuscator for scripts messed up with MyObfuscate.com"""
import re
import base64
# Python 2 retrocompatibility
# pylint: disable=F0401
# pylint: disable=E0611
try:
from urllib import unquote
except ImportError:
from urllib.parse import unquote
from jsbeautifier.unpackers import UnpackingError
PRIORITY = 1
CAVEAT = """//
// Unpacker warning: be careful when using myobfuscate.com for your projects:
// scripts obfuscated by the free online version call back home.
//
"""
SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65'
r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75'
r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B'
r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78'
r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","'
r'\x6C\x65\x6E\x67\x74\x68"]')
def detect(source):
"""Detects MyObfuscate.com packer."""
return SIGNATURE in source
def unpack(source):
"""Unpacks js code packed with MyObfuscate.com"""
if not detect(source):
return source
payload = unquote(_filter(source))
match = re.search(r"^var _escape\='<script>(.*)<\/script>'",
payload, re.DOTALL)
polished = match.group(1) if match else source
return CAVEAT + polished
def _filter(source):
"""Extracts and decode payload (original file) from `source`"""
try:
varname = re.search(r'eval\(\w+\(\w+\((\w+)\)\)\);', source).group(1)
reverse = re.search(r"var +%s *\= *'(.*)';" % varname, source).group(1)
except AttributeError:
raise UnpackingError('Malformed MyObfuscate data.')
try:
return base64.b64decode(reverse[::-1].encode('utf8')).decode('utf8')
except TypeError:
raise UnpackingError('MyObfuscate payload is not base64-encoded.')

View File

@ -0,0 +1,104 @@
#
# Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier
# by Einar Lielmanis <einar@jsbeautifier.org>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
"""Unpacker for Dean Edward's p.a.c.k.e.r"""
import re
import string
from jsbeautifier.unpackers import UnpackingError
PRIORITY = 1
def detect(source):
"""Detects whether `source` is P.A.C.K.E.R. coded."""
return source.replace(' ', '').startswith('eval(function(p,a,c,k,e,r')
def unpack(source):
"""Unpacks P.A.C.K.E.R. packed js code."""
payload, symtab, radix, count = _filterargs(source)
if count != len(symtab):
raise UnpackingError('Malformed p.a.c.k.e.r. symtab.')
try:
unbase = Unbaser(radix)
except TypeError:
raise UnpackingError('Unknown p.a.c.k.e.r. encoding.')
def lookup(match):
"""Look up symbols in the synthetic symtab."""
word = match.group(0)
return symtab[unbase(word)] or word
source = re.sub(r'\b\w+\b', lookup, payload)
return _replacestrings(source)
def _filterargs(source):
"""Juice from a source file the four args needed by decoder."""
argsregex = (r"}\('(.*)', *(\d+), *(\d+), *'(.*)'\."
r"split\('\|'\), *(\d+), *(.*)\)\)")
args = re.search(argsregex, source, re.DOTALL).groups()
try:
return args[0], args[3].split('|'), int(args[1]), int(args[2])
except ValueError:
raise UnpackingError('Corrupted p.a.c.k.e.r. data.')
def _replacestrings(source):
"""Strip string lookup table (list) and replace values in source."""
match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL)
if match:
varname, strings = match.groups()
startpoint = len(match.group(0))
lookup = strings.split('","')
variable = '%s[%%d]' % varname
for index, value in enumerate(lookup):
source = source.replace(variable % index, '"%s"' % value)
return source[startpoint:]
return source
class Unbaser(object):
"""Functor for a given base. Will efficiently convert
strings to natural numbers."""
ALPHABET = {
62 : '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
95 : (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
}
def __init__(self, base):
self.base = base
# If base can be handled by int() builtin, let it do it for us
if 2 <= base <= 36:
self.unbase = lambda string: int(string, base)
else:
# Build conversion dictionary cache
try:
self.dictionary = dict((cipher, index) for
index, cipher in enumerate(self.ALPHABET[base]))
except KeyError:
raise TypeError('Unsupported base encoding.')
self.unbase = self._dictunbaser
def __call__(self, string):
return self.unbase(string)
def _dictunbaser(self, string):
"""Decodes a value to an integer."""
ret = 0
for index, cipher in enumerate(string[::-1]):
ret += (self.base ** index) * self.dictionary[cipher]
return ret

View File

@ -0,0 +1,34 @@
#
# Trivial bookmarklet/escaped script detector for the javascript beautifier
# written by Einar Lielmanis <einar@jsbeautifier.org>
# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# Will always return valid javascript: if `detect()` is false, `code` is
# returned, unmodified.
#
# usage:
#
# some_string = urlencode.unpack(some_string)
#
"""Bookmarklet/escaped script unpacker."""
# Python 2 retrocompatibility
# pylint: disable=F0401
# pylint: disable=E0611
try:
from urllib import unquote_plus
except ImportError:
from urllib.parse import unquote_plus
PRIORITY = 0
def detect(code):
"""Detects if a scriptlet is urlencoded."""
# the fact that script doesn't contain any space, but has %20 instead
# should be sufficient check for now.
return ' ' not in code and ('%20' in code or code.count('%') > 3)
def unpack(code):
"""URL decode `code` source string."""
return unquote_plus(code) if detect(code) else code

View File

@ -58,10 +58,9 @@ def cleanBin(s):
parts = []
for i in s:
o = ord(i)
if o > 31 and o < 127:
if (o > 31 and o < 127) or i in "\n\r\t":
parts.append(i)
else:
if i not in "\n\r\t":
parts.append(".")
return "".join(parts)

View File

@ -75,6 +75,11 @@ class uContentView(libpry.AutoTree):
def test_view_raw(self):
assert cv.view_raw([], "foo")
def test_view_javascript(self):
assert cv.view_javascript([], "[1, 2, 3]")
assert cv.view_javascript([], "[1, 2, 3")
assert cv.view_javascript([], "function(a){[1, 2, 3]}")
def test_view_raw(self):
assert cv.view_hex([], "foo")