Created a lexer for the command bar

This commit is contained in:
Henrique 2019-11-12 21:27:02 -05:00
parent b321e07279
commit 561415cea9
2 changed files with 159 additions and 50 deletions

View File

@ -11,49 +11,9 @@ import functools
import sys import sys
from mitmproxy import exceptions from mitmproxy import exceptions
from mitmproxy import lexer
import mitmproxy.types import mitmproxy.types
def escape_and_quote(value):
"""
This function takes the output from the lexer and puts it between quotes
in the following cases:
* There is a space in the string: The only way a token from the lexer can have a space in it is if it was between quotes
* There is one or more quotes in the middle of the string: The only way for a token to have a quote in it that is not escaped is if it was escaped prior to being processed by the lexer. For example, the string `"s1 \" s2"` would come back from the lexer as `s1 " s2`.
Any quotes that are in the middle of the string and that are not escaped will also be escaped (by placing a \ in front of it).
This function only deals with double quotes and they are the only ones that should be used.
"""
new_value = ""
last_pos = len(value) - 1
for pos, char in enumerate(value):
if pos == 0:
new_value += char
continue
# if pos == last_pos:
# new_value += char
# break
if char in " \n\r\t":
new_value += char
continue
if char == '"':
if value[pos-1] != '\\':
new_value += '\\'
new_value += char
value = new_value
if ((" " in value) or ('"' in value)) and not (value.startswith("\"") or value.startswith("'")):
return "\"%s\"" % value
return value
def verify_arg_signature(f: typing.Callable, args: list, kwargs: dict) -> None: def verify_arg_signature(f: typing.Callable, args: list, kwargs: dict) -> None:
sig = inspect.signature(f) sig = inspect.signature(f)
try: try:
@ -62,13 +22,8 @@ def verify_arg_signature(f: typing.Callable, args: list, kwargs: dict) -> None:
raise exceptions.CommandError("command argument mismatch: %s" % v.args[0]) raise exceptions.CommandError("command argument mismatch: %s" % v.args[0])
def lexer(s): def get_lexer(s):
# mypy mis-identifies shlex.shlex as abstract return lexer.Lexer(s)
lex = shlex.shlex(s, posix=True) # type: ignore
lex.wordchars += "."
lex.whitespace_split = True
lex.commenters = ''
return lex
def typename(t: type) -> str: def typename(t: type) -> str:
@ -199,7 +154,7 @@ class CommandManager(mitmproxy.types._CommandBase):
""" """
buf = io.StringIO(cmdstr) buf = io.StringIO(cmdstr)
parts: typing.List[str] = [] parts: typing.List[str] = []
lex = lexer(buf) lex = get_lexer(buf)
while 1: while 1:
remainder = cmdstr[buf.tell():] remainder = cmdstr[buf.tell():]
try: try:
@ -245,7 +200,7 @@ class CommandManager(mitmproxy.types._CommandBase):
# ctx.log.info('[gilga] before parse.append. value = %s' % parts[i]) # ctx.log.info('[gilga] before parse.append. value = %s' % parts[i])
parse.append( parse.append(
ParseResult( ParseResult(
value=escape_and_quote(parts[i]), value=parts[i],
type=typ, type=typ,
valid=valid, valid=valid,
) )

154
mitmproxy/lexer.py Normal file
View File

@ -0,0 +1,154 @@
from enum import Enum
import io
from typing import Union
import pdb
class State(Enum):
QUOTE = 1
ESCAPE = 2
TEXT = 3
class Lexer:
def __init__(self, text: Union[str, io.StringIO]):
self._tokens = []
self._count = 0
self._parsed = False
self._state = State.TEXT
self._states = []
self._text_pos = 0
self._quote_start_pos = 0
if isinstance(text, str):
self.text = io.StringIO(text)
else:
self.text = text
def __iter__(self):
return self
def __next__(self):
t = self.get_token()
if t == '':
raise StopIteration
return t
def get_token(self):
try:
return self.parse()
except ValueError as e:
raise
if len(self._tokens) > 0:
ret = self._tokens[0]
self._tokens = self._tokens[1:]
else:
ret = None
return ret
#def get_remainder(self):
# try:
# self.parse()
# except ValueError as e:
# return self.text
#
# return ' '.join(self._tokens)
def parse(self):
acc = ''
quote = '' # used by the parser
tokens = []
self._state = State.TEXT
text = self.text
i = 0
#self.text.seek(self._text_pos)
while True:
ch = self.text.read(1)
self._text_pos += 1
#pdb.set_trace()
# If this is the last char of the string, let's save the token
if ch == '' or ch is None:
break
if self._state == State.QUOTE:
if ch == '\\':
self._states.append(self._state)
self._state = State.ESCAPE
acc += ch
elif ch == quote:
self._state = self._states.pop()
acc += ch
else:
acc += ch
elif self._state == State.ESCAPE:
acc += ch
self._state = self._states.pop()
elif self._state == State.TEXT:
if ch == ' ':
if acc != '':
break
elif ch == '"' or ch == "'":
quote = ch
self._quote_start_pos = self._text_pos
self._states.append(self._state)
self._state = State.QUOTE
acc += ch
elif ch == '\\':
# TODO: Does it make sense to go to State.ESCAPE from State.TEXT?
self._states.append(self._state)
self._state = State.ESCAPE
acc += ch
else:
acc += ch
else:
print("This shouldn't have happened")
exit(-1)
self._token = acc
if self._state == State.QUOTE:
raise ValueError("No closing quotation for quote in position %d" % self._quote_start_pos)
return self._token
if __name__ == '__main__':
cases = []
cases.append(r'abc')
cases.append(r'Hello World')
cases.append(r'"Hello \" World"')
cases.append(r"'Hello \' World'")
cases.append(r'"\""')
cases.append(r'abc "def\" \x bla \z \\ \e \ " xpto')
cases.append(r'')
cases.append(r' ')
cases.append(r' ')
cases.append(r' ')
cases.append(r' ')
cases.append(r'Hello World ')
for s in cases:
lex = Lexer(s)
tokens = list(lex)
if len(tokens) == 1:
print('%s = %d token' % (str(tokens), len(tokens)))
else:
print('%s = %d tokens' % (str(tokens), len(tokens)))