2010-02-16 04:09:07 +00:00
# module pyparsing.py
#
# Copyright (c) 2003-2009 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#from __future__ import generators
__doc__ = \
"""
pyparsing module - Classes and methods to define and execute parsing grammars
The pyparsing module is an alternative approach to creating and executing simple grammars ,
vs . the traditional lex / yacc approach , or the use of regular expressions . With pyparsing , you
don ' t need to learn a new syntax for defining grammars or matching expressions - the parsing module
provides a library of classes that you use to construct the grammar directly in Python .
Here is a program to parse " Hello, World! " ( or any greeting of the form " <salutation>, <addressee>! " ) : :
from pyparsing import Word , alphas
# define grammar of a greeting
greet = Word ( alphas ) + " , " + Word ( alphas ) + " ! "
hello = " Hello, World! "
print hello , " -> " , greet . parseString ( hello )
The program outputs the following : :
Hello , World ! - > [ ' Hello ' , ' , ' , ' World ' , ' ! ' ]
The Python representation of the grammar is quite readable , owing to the self - explanatory
class names , and the use of ' + ' , ' | ' and ' ^ ' operators .
The parsed results returned from parseString ( ) can be accessed as a nested list , a dictionary , or an
object with named attributes .
The pyparsing module handles some of the problems that are typically vexing when writing text parsers :
- extra or missing whitespace ( the above program will also handle " Hello,World! " , " Hello , World ! " , etc . )
- quoted strings
- embedded comments
"""
__version__ = " 1.5.2 "
__versionTime__ = " 17 February 2009 19:45 "
__author__ = " Paul McGuire <ptmcg@users.sourceforge.net> "
import string
from weakref import ref as wkref
import copy
import sys
import warnings
import re
import sre_constants
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [
' And ' , ' CaselessKeyword ' , ' CaselessLiteral ' , ' CharsNotIn ' , ' Combine ' , ' Dict ' , ' Each ' , ' Empty ' ,
' FollowedBy ' , ' Forward ' , ' GoToColumn ' , ' Group ' , ' Keyword ' , ' LineEnd ' , ' LineStart ' , ' Literal ' ,
' MatchFirst ' , ' NoMatch ' , ' NotAny ' , ' OneOrMore ' , ' OnlyOnce ' , ' Optional ' , ' Or ' ,
' ParseBaseException ' , ' ParseElementEnhance ' , ' ParseException ' , ' ParseExpression ' , ' ParseFatalException ' ,
' ParseResults ' , ' ParseSyntaxException ' , ' ParserElement ' , ' QuotedString ' , ' RecursiveGrammarException ' ,
' Regex ' , ' SkipTo ' , ' StringEnd ' , ' StringStart ' , ' Suppress ' , ' Token ' , ' TokenConverter ' , ' Upcase ' ,
' White ' , ' Word ' , ' WordEnd ' , ' WordStart ' , ' ZeroOrMore ' ,
' alphanums ' , ' alphas ' , ' alphas8bit ' , ' anyCloseTag ' , ' anyOpenTag ' , ' cStyleComment ' , ' col ' ,
' commaSeparatedList ' , ' commonHTMLEntity ' , ' countedArray ' , ' cppStyleComment ' , ' dblQuotedString ' ,
' dblSlashComment ' , ' delimitedList ' , ' dictOf ' , ' downcaseTokens ' , ' empty ' , ' getTokensEndLoc ' , ' hexnums ' ,
' htmlComment ' , ' javaStyleComment ' , ' keepOriginalText ' , ' line ' , ' lineEnd ' , ' lineStart ' , ' lineno ' ,
' makeHTMLTags ' , ' makeXMLTags ' , ' matchOnlyAtCol ' , ' matchPreviousExpr ' , ' matchPreviousLiteral ' ,
' nestedExpr ' , ' nullDebugAction ' , ' nums ' , ' oneOf ' , ' opAssoc ' , ' operatorPrecedence ' , ' printables ' ,
' punc8bit ' , ' pythonStyleComment ' , ' quotedString ' , ' removeQuotes ' , ' replaceHTMLEntity ' ,
' replaceWith ' , ' restOfLine ' , ' sglQuotedString ' , ' srange ' , ' stringEnd ' ,
' stringStart ' , ' traceParseAction ' , ' unicodeString ' , ' upcaseTokens ' , ' withAttribute ' ,
' indentedBlock ' , ' originalTextFor ' ,
]
"""
Detect if we are running version 3. X and make appropriate changes
Robert A . Clark
"""
if sys . version_info [ 0 ] > 2 :
_PY3K = True
_MAX_INT = sys . maxsize
basestring = str
else :
_PY3K = False
_MAX_INT = sys . maxint
if not _PY3K :
def _ustr ( obj ) :
""" Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str ( obj ) . If that fails with a UnicodeEncodeError , then it tries unicode ( obj ) . It
then < returns the unicode object | encodes it with the default encoding | . . . > .
"""
if isinstance ( obj , unicode ) :
return obj
try :
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str ( obj )
except UnicodeEncodeError :
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string
# object"?
# If so, then return a unicode object:
return unicode ( obj )
# Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ...
else :
_ustr = str
unichr = chr
if not _PY3K :
def _str2dict ( strg ) :
2011-08-18 21:58:44 +00:00
return dict ( [ ( c , 0 ) for c in strg ] )
2010-02-16 04:09:07 +00:00
else :
_str2dict = set
def _xml_escape ( data ) :
""" Escape &, <, >, " , ' , etc. in a string of data. """
# ampersand must be replaced first
from_symbols = ' &>< " \' '
to_symbols = [ ' & ' + s + ' ; ' for s in " amp gt lt quot apos " . split ( ) ]
for from_ , to_ in zip ( from_symbols , to_symbols ) :
data = data . replace ( from_ , to_ )
return data
class _Constants ( object ) :
pass
if not _PY3K :
alphas = string . lowercase + string . uppercase
else :
alphas = string . ascii_lowercase + string . ascii_uppercase
nums = string . digits
hexnums = nums + " ABCDEFabcdef "
alphanums = alphas + nums
_bslash = chr ( 92 )
2011-08-18 21:58:44 +00:00
printables = " " . join ( [ c for c in string . printable if c not in string . whitespace ] )
2010-02-16 04:09:07 +00:00
class ParseBaseException ( Exception ) :
""" base exception class for all parsing runtime exceptions """
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__ ( self , pstr , loc = 0 , msg = None , elem = None ) :
self . loc = loc
if msg is None :
self . msg = pstr
self . pstr = " "
else :
self . msg = msg
self . pstr = pstr
self . parserElement = elem
def __getattr__ ( self , aname ) :
""" supported attributes by name are:
- lineno - returns the line number of the exception text
- col - returns the column number of the exception text
- line - returns the line containing the exception text
"""
if ( aname == " lineno " ) :
return lineno ( self . loc , self . pstr )
elif ( aname in ( " col " , " column " ) ) :
return col ( self . loc , self . pstr )
elif ( aname == " line " ) :
return line ( self . loc , self . pstr )
else :
raise AttributeError ( aname )
def __str__ ( self ) :
return " %s (at char %d ), (line: %d , col: %d ) " % \
( self . msg , self . loc , self . lineno , self . column )
def __repr__ ( self ) :
return _ustr ( self )
def markInputline ( self , markerString = " >!< " ) :
""" Extracts the exception line from the input string, and marks
the location of the exception with a special symbol .
"""
line_str = self . line
line_column = self . column - 1
if markerString :
2011-08-18 21:58:44 +00:00
line_str = " " . join ( [ line_str [ : line_column ] ,
markerString , line_str [ line_column : ] ] )
2010-02-16 04:09:07 +00:00
return line_str . strip ( )
def __dir__ ( self ) :
return " loc msg pstr parserElement lineno col line " \
" markInputLine __str__ __repr__ " . split ( )
class ParseException ( ParseBaseException ) :
""" exception thrown when parse expressions don ' t match class;
supported attributes by name are :
- lineno - returns the line number of the exception text
- col - returns the column number of the exception text
- line - returns the line containing the exception text
"""
pass
class ParseFatalException ( ParseBaseException ) :
""" user-throwable exception thrown when inconsistent parse content
is found ; stops all parsing immediately """
pass
class ParseSyntaxException ( ParseFatalException ) :
""" just like ParseFatalException, but thrown internally when an
ErrorStop indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found """
def __init__ ( self , pe ) :
super ( ParseSyntaxException , self ) . __init__ (
pe . pstr , pe . loc , pe . msg , pe . parserElement )
#~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause
#~ pyparsing to reparse the input string:
#~ - with a modified input string, and/or
#~ - with a modified start location
#~ Set the values of the ReparseException in the constructor, and raise the
#~ exception in a parse action to cause pyparsing to use the new string/location.
#~ Setting the values as None causes no change to be made.
#~ """
#~ def __init_( self, newstring, restartLoc ):
#~ self.newParseText = newstring
#~ self.reparseLoc = restartLoc
class RecursiveGrammarException ( Exception ) :
""" exception thrown by validate() if the grammar could be improperly recursive """
def __init__ ( self , parseElementList ) :
self . parseElementTrace = parseElementList
def __str__ ( self ) :
return " RecursiveGrammarException: %s " % self . parseElementTrace
class _ParseResultsWithOffset ( object ) :
def __init__ ( self , p1 , p2 ) :
self . tup = ( p1 , p2 )
def __getitem__ ( self , i ) :
return self . tup [ i ]
def __repr__ ( self ) :
return repr ( self . tup )
def setOffset ( self , i ) :
self . tup = ( self . tup [ 0 ] , i )
class ParseResults ( object ) :
""" Structured parse results, to provide multiple means of access to the parsed data:
- as a list ( len ( results ) )
- by list index ( results [ 0 ] , results [ 1 ] , etc . )
- by attribute ( results . < resultsName > )
"""
__slots__ = ( " __toklist " , " __tokdict " , " __doinit " , " __name " , " __parent " , " __accumNames " , " __weakref__ " )
def __new__ ( cls , toklist , name = None , asList = True , modal = True ) :
if isinstance ( toklist , cls ) :
return toklist
retobj = object . __new__ ( cls )
retobj . __doinit = True
return retobj
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__ ( self , toklist , name = None , asList = True , modal = True ) :
if self . __doinit :
self . __doinit = False
self . __name = None
self . __parent = None
self . __accumNames = { }
if isinstance ( toklist , list ) :
self . __toklist = toklist [ : ]
else :
self . __toklist = [ toklist ]
self . __tokdict = dict ( )
if name :
if not modal :
self . __accumNames [ name ] = 0
if isinstance ( name , int ) :
name = _ustr ( name ) # will always return a str, but use _ustr for consistency
self . __name = name
if not toklist in ( None , ' ' , [ ] ) :
if isinstance ( toklist , basestring ) :
toklist = [ toklist ]
if asList :
if isinstance ( toklist , ParseResults ) :
self [ name ] = _ParseResultsWithOffset ( toklist . copy ( ) , 0 )
else :
self [ name ] = _ParseResultsWithOffset ( ParseResults ( toklist [ 0 ] ) , 0 )
self [ name ] . __name = name
else :
try :
self [ name ] = toklist [ 0 ]
except ( KeyError , TypeError , IndexError ) :
self [ name ] = toklist
def __getitem__ ( self , i ) :
if isinstance ( i , ( int , slice ) ) :
return self . __toklist [ i ]
else :
if i not in self . __accumNames :
return self . __tokdict [ i ] [ - 1 ] [ 0 ]
else :
return ParseResults ( [ v [ 0 ] for v in self . __tokdict [ i ] ] )
def __setitem__ ( self , k , v ) :
if isinstance ( v , _ParseResultsWithOffset ) :
self . __tokdict [ k ] = self . __tokdict . get ( k , list ( ) ) + [ v ]
sub = v [ 0 ]
elif isinstance ( k , int ) :
self . __toklist [ k ] = v
sub = v
else :
self . __tokdict [ k ] = self . __tokdict . get ( k , list ( ) ) + [ _ParseResultsWithOffset ( v , 0 ) ]
sub = v
if isinstance ( sub , ParseResults ) :
sub . __parent = wkref ( self )
def __delitem__ ( self , i ) :
if isinstance ( i , ( int , slice ) ) :
mylen = len ( self . __toklist )
del self . __toklist [ i ]
# convert int to slice
if isinstance ( i , int ) :
if i < 0 :
i + = mylen
i = slice ( i , i + 1 )
# get removed indices
removed = list ( range ( * i . indices ( mylen ) ) )
removed . reverse ( )
# fixup indices in token dictionary
for name in self . __tokdict :
occurrences = self . __tokdict [ name ]
for j in removed :
for k , ( value , position ) in enumerate ( occurrences ) :
occurrences [ k ] = _ParseResultsWithOffset ( value , position - ( position > j ) )
else :
del self . __tokdict [ i ]
def __contains__ ( self , k ) :
return k in self . __tokdict
def __len__ ( self ) : return len ( self . __toklist )
def __bool__ ( self ) : return len ( self . __toklist ) > 0
__nonzero__ = __bool__
def __iter__ ( self ) : return iter ( self . __toklist )
def __reversed__ ( self ) : return iter ( reversed ( self . __toklist ) )
def keys ( self ) :
""" Returns all named result keys. """
return self . __tokdict . keys ( )
def pop ( self , index = - 1 ) :
""" Removes and returns item at specified index (default=last).
Will work with either numeric indices or dict - key indicies . """
ret = self [ index ]
del self [ index ]
return ret
def get ( self , key , defaultValue = None ) :
""" Returns named result matching the given key, or if there is no
such name , then returns the given defaultValue or None if no
defaultValue is specified . """
if key in self :
return self [ key ]
else :
return defaultValue
def insert ( self , index , insStr ) :
self . __toklist . insert ( index , insStr )
# fixup indices in token dictionary
for name in self . __tokdict :
occurrences = self . __tokdict [ name ]
for k , ( value , position ) in enumerate ( occurrences ) :
occurrences [ k ] = _ParseResultsWithOffset ( value , position + ( position > index ) )
def items ( self ) :
""" Returns all named result keys and values as a list of tuples. """
return [ ( k , self [ k ] ) for k in self . __tokdict ]
def values ( self ) :
""" Returns all named result values. """
return [ v [ - 1 ] [ 0 ] for v in self . __tokdict . values ( ) ]
def __getattr__ ( self , name ) :
if name not in self . __slots__ :
if name in self . __tokdict :
if name not in self . __accumNames :
return self . __tokdict [ name ] [ - 1 ] [ 0 ]
else :
return ParseResults ( [ v [ 0 ] for v in self . __tokdict [ name ] ] )
else :
return " "
return None
def __add__ ( self , other ) :
ret = self . copy ( )
ret + = other
return ret
def __iadd__ ( self , other ) :
if other . __tokdict :
offset = len ( self . __toklist )
addoffset = ( lambda a : ( a < 0 and offset ) or ( a + offset ) )
otheritems = other . __tokdict . items ( )
otherdictitems = [ ( k , _ParseResultsWithOffset ( v [ 0 ] , addoffset ( v [ 1 ] ) ) )
for ( k , vlist ) in otheritems for v in vlist ]
for k , v in otherdictitems :
self [ k ] = v
if isinstance ( v [ 0 ] , ParseResults ) :
v [ 0 ] . __parent = wkref ( self )
self . __toklist + = other . __toklist
self . __accumNames . update ( other . __accumNames )
del other
return self
def __repr__ ( self ) :
return " ( %s , %s ) " % ( repr ( self . __toklist ) , repr ( self . __tokdict ) )
def __str__ ( self ) :
out = " [ "
sep = " "
for i in self . __toklist :
if isinstance ( i , ParseResults ) :
out + = sep + _ustr ( i )
else :
out + = sep + repr ( i )
sep = " , "
out + = " ] "
return out
def _asStringList ( self , sep = ' ' ) :
out = [ ]
for item in self . __toklist :
if out and sep :
out . append ( sep )
if isinstance ( item , ParseResults ) :
out + = item . _asStringList ( )
else :
out . append ( _ustr ( item ) )
return out
def asList ( self ) :
""" Returns the parse results as a nested list of matching tokens, all converted to strings. """
out = [ ]
for res in self . __toklist :
if isinstance ( res , ParseResults ) :
out . append ( res . asList ( ) )
else :
out . append ( res )
return out
def asDict ( self ) :
""" Returns the named parse results as dictionary. """
return dict ( self . items ( ) )
def copy ( self ) :
""" Returns a new copy of a ParseResults object. """
ret = ParseResults ( self . __toklist )
ret . __tokdict = self . __tokdict . copy ( )
ret . __parent = self . __parent
ret . __accumNames . update ( self . __accumNames )
ret . __name = self . __name
return ret
def asXML ( self , doctag = None , namedItemsOnly = False , indent = " " , formatted = True ) :
""" Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. """
nl = " \n "
out = [ ]
2011-08-18 21:58:44 +00:00
namedItems = dict ( [ ( v [ 1 ] , k ) for ( k , vlist ) in self . __tokdict . items ( )
for v in vlist ] )
2010-02-16 04:09:07 +00:00
nextLevelIndent = indent + " "
# collapse out indents if formatting is not desired
if not formatted :
indent = " "
nextLevelIndent = " "
nl = " "
selfTag = None
if doctag is not None :
selfTag = doctag
else :
if self . __name :
selfTag = self . __name
if not selfTag :
if namedItemsOnly :
return " "
else :
selfTag = " ITEM "
out + = [ nl , indent , " < " , selfTag , " > " ]
worklist = self . __toklist
for i , res in enumerate ( worklist ) :
if isinstance ( res , ParseResults ) :
if i in namedItems :
out + = [ res . asXML ( namedItems [ i ] ,
namedItemsOnly and doctag is None ,
nextLevelIndent ,
formatted ) ]
else :
out + = [ res . asXML ( None ,
namedItemsOnly and doctag is None ,
nextLevelIndent ,
formatted ) ]
else :
# individual token, see if there is a name for it
resTag = None
if i in namedItems :
resTag = namedItems [ i ]
if not resTag :
if namedItemsOnly :
continue
else :
resTag = " ITEM "
xmlBodyText = _xml_escape ( _ustr ( res ) )
out + = [ nl , nextLevelIndent , " < " , resTag , " > " ,
xmlBodyText ,
" </ " , resTag , " > " ]
out + = [ nl , indent , " </ " , selfTag , " > " ]
return " " . join ( out )
def __lookup ( self , sub ) :
for k , vlist in self . __tokdict . items ( ) :
for v , loc in vlist :
if sub is v :
return k
return None
def getName ( self ) :
""" Returns the results name for this token expression. """
if self . __name :
return self . __name
elif self . __parent :
par = self . __parent ( )
if par :
return par . __lookup ( self )
else :
return None
elif ( len ( self ) == 1 and
len ( self . __tokdict ) == 1 and
self . __tokdict . values ( ) [ 0 ] [ 0 ] [ 1 ] in ( 0 , - 1 ) ) :
return self . __tokdict . keys ( ) [ 0 ]
else :
return None
def dump ( self , indent = ' ' , depth = 0 ) :
""" Diagnostic method for listing out the contents of a ParseResults.
Accepts an optional indent argument so that this string can be embedded
in a nested display of other data . """
out = [ ]
out . append ( indent + _ustr ( self . asList ( ) ) )
keys = self . items ( )
keys . sort ( )
for k , v in keys :
if out :
out . append ( ' \n ' )
out . append ( " %s %s - %s : " % ( indent , ( ' ' * depth ) , k ) )
if isinstance ( v , ParseResults ) :
if v . keys ( ) :
#~ out.append('\n')
out . append ( v . dump ( indent , depth + 1 ) )
#~ out.append('\n')
else :
out . append ( _ustr ( v ) )
else :
out . append ( _ustr ( v ) )
#~ out.append('\n')
return " " . join ( out )
# add support for pickle protocol
def __getstate__ ( self ) :
return ( self . __toklist ,
( self . __tokdict . copy ( ) ,
self . __parent is not None and self . __parent ( ) or None ,
self . __accumNames ,
self . __name ) )
def __setstate__ ( self , state ) :
self . __toklist = state [ 0 ]
self . __tokdict , \
par , \
inAccumNames , \
self . __name = state [ 1 ]
self . __accumNames = { }
self . __accumNames . update ( inAccumNames )
if par is not None :
self . __parent = wkref ( par )
else :
self . __parent = None
def __dir__ ( self ) :
return dir ( super ( ParseResults , self ) ) + self . keys ( )
def col ( loc , strg ) :
""" Returns current column within a string, counting newlines as line separators.
The first column is number 1.
Note : the default parsing behavior is to expand tabs in the input string
before starting the parsing process . See L { I { ParserElement . parseString } < ParserElement . parseString > } for more information
on parsing strings containing < TAB > s , and suggested methods to maintain a
consistent view of the parsed string , the parse location , and line and column
positions within the parsed string .
"""
return ( loc < len ( strg ) and strg [ loc ] == ' \n ' ) and 1 or loc - strg . rfind ( " \n " , 0 , loc )
def lineno ( loc , strg ) :
""" Returns current line number within a string, counting newlines as line separators.
The first line is number 1.
Note : the default parsing behavior is to expand tabs in the input string
before starting the parsing process . See L { I { ParserElement . parseString } < ParserElement . parseString > } for more information
on parsing strings containing < TAB > s , and suggested methods to maintain a
consistent view of the parsed string , the parse location , and line and column
positions within the parsed string .
"""
return strg . count ( " \n " , 0 , loc ) + 1
def line ( loc , strg ) :
""" Returns the line of text containing loc within a string, counting newlines as line separators.
"""
lastCR = strg . rfind ( " \n " , 0 , loc )
nextCR = strg . find ( " \n " , loc )
if nextCR > 0 :
return strg [ lastCR + 1 : nextCR ]
else :
return strg [ lastCR + 1 : ]
def _defaultStartDebugAction ( instring , loc , expr ) :
print ( " Match " + _ustr ( expr ) + " at loc " + _ustr ( loc ) + " ( %d , %d ) " % ( lineno ( loc , instring ) , col ( loc , instring ) ) )
def _defaultSuccessDebugAction ( instring , startloc , endloc , expr , toks ) :
print ( " Matched " + _ustr ( expr ) + " -> " + str ( toks . asList ( ) ) )
def _defaultExceptionDebugAction ( instring , loc , expr , exc ) :
print ( " Exception raised: " + _ustr ( exc ) )
def nullDebugAction ( * args ) :
""" ' Do-nothing ' debug action, to suppress debugging output during parsing. """
pass
class ParserElement ( object ) :
""" Abstract base level parser element class. """
DEFAULT_WHITE_CHARS = " \n \t \r "
def setDefaultWhitespaceChars ( chars ) :
""" Overrides the default whitespace chars
"""
ParserElement . DEFAULT_WHITE_CHARS = chars
setDefaultWhitespaceChars = staticmethod ( setDefaultWhitespaceChars )
def __init__ ( self , savelist = False ) :
self . parseAction = list ( )
self . failAction = None
#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
self . strRepr = None
self . resultsName = None
self . saveAsList = savelist
self . skipWhitespace = True
self . whiteChars = ParserElement . DEFAULT_WHITE_CHARS
self . copyDefaultWhiteChars = True
self . mayReturnEmpty = False # used when checking for left-recursion
self . keepTabs = False
self . ignoreExprs = list ( )
self . debug = False
self . streamlined = False
self . mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
self . errmsg = " "
self . modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
self . debugActions = ( None , None , None ) #custom debug actions
self . re = None
self . callPreparse = True # used to avoid redundant calls to preParse
self . callDuringTry = False
def copy ( self ) :
""" Make a copy of this ParserElement. Useful for defining different parse actions
for the same parsing pattern , using copies of the original parse element . """
cpy = copy . copy ( self )
cpy . parseAction = self . parseAction [ : ]
cpy . ignoreExprs = self . ignoreExprs [ : ]
if self . copyDefaultWhiteChars :
cpy . whiteChars = ParserElement . DEFAULT_WHITE_CHARS
return cpy
def setName ( self , name ) :
""" Define name for this expression, for use in debugging. """
self . name = name
self . errmsg = " Expected " + self . name
if hasattr ( self , " exception " ) :
self . exception . msg = self . errmsg
return self
def setResultsName ( self , name , listAllMatches = False ) :
""" Define name for referencing matching tokens as a nested attribute
of the returned parse results .
NOTE : this returns a * copy * of the original ParserElement object ;
this is so that the client can define a basic element , such as an
integer , and reference it in multiple places with different names .
"""
newself = self . copy ( )
newself . resultsName = name
newself . modalResults = not listAllMatches
return newself
def setBreak ( self , breakFlag = True ) :
""" Method to invoke the Python pdb debugger when this element is
about to be parsed . Set breakFlag to True to enable , False to
disable .
"""
if breakFlag :
_parseMethod = self . _parse
def breaker ( instring , loc , doActions = True , callPreParse = True ) :
import pdb
pdb . set_trace ( )
return _parseMethod ( instring , loc , doActions , callPreParse )
breaker . _originalParseMethod = _parseMethod
self . _parse = breaker
else :
if hasattr ( self . _parse , " _originalParseMethod " ) :
self . _parse = self . _parse . _originalParseMethod
return self
def _normalizeParseActionArgs ( f ) :
""" Internal method used to decorate parse actions that take fewer than 3 arguments,
so that all parse actions can be called as f ( s , l , t ) . """
STAR_ARGS = 4
try :
restore = None
if isinstance ( f , type ) :
restore = f
f = f . __init__
if not _PY3K :
codeObj = f . func_code
else :
codeObj = f . code
if codeObj . co_flags & STAR_ARGS :
return f
numargs = codeObj . co_argcount
if not _PY3K :
if hasattr ( f , " im_self " ) :
numargs - = 1
else :
if hasattr ( f , " __self__ " ) :
numargs - = 1
if restore :
f = restore
except AttributeError :
try :
if not _PY3K :
call_im_func_code = f . __call__ . im_func . func_code
else :
call_im_func_code = f . __code__
# not a function, must be a callable object, get info from the
# im_func binding of its bound __call__ method
if call_im_func_code . co_flags & STAR_ARGS :
return f
numargs = call_im_func_code . co_argcount
if not _PY3K :
if hasattr ( f . __call__ , " im_self " ) :
numargs - = 1
else :
if hasattr ( f . __call__ , " __self__ " ) :
numargs - = 0
except AttributeError :
if not _PY3K :
call_func_code = f . __call__ . func_code
else :
call_func_code = f . __call__ . __code__
# not a bound method, get info directly from __call__ method
if call_func_code . co_flags & STAR_ARGS :
return f
numargs = call_func_code . co_argcount
if not _PY3K :
if hasattr ( f . __call__ , " im_self " ) :
numargs - = 1
else :
if hasattr ( f . __call__ , " __self__ " ) :
numargs - = 1
#~ print ("adding function %s with %d args" % (f.func_name,numargs))
if numargs == 3 :
return f
else :
if numargs > 3 :
def tmp ( s , l , t ) :
return f ( f . __call__ . __self__ , s , l , t )
if numargs == 2 :
def tmp ( s , l , t ) :
return f ( l , t )
elif numargs == 1 :
def tmp ( s , l , t ) :
return f ( t )
else : #~ numargs == 0:
def tmp ( s , l , t ) :
return f ( )
try :
tmp . __name__ = f . __name__
except ( AttributeError , TypeError ) :
# no need for special handling if attribute doesnt exist
pass
try :
tmp . __doc__ = f . __doc__
except ( AttributeError , TypeError ) :
# no need for special handling if attribute doesnt exist
pass
try :
tmp . __dict__ . update ( f . __dict__ )
except ( AttributeError , TypeError ) :
# no need for special handling if attribute doesnt exist
pass
return tmp
_normalizeParseActionArgs = staticmethod ( _normalizeParseActionArgs )
def setParseAction ( self , * fns , * * kwargs ) :
""" Define action to perform when successfully matching parse element definition.
Parse action fn is a callable method with 0 - 3 arguments , called as fn ( s , loc , toks ) ,
fn ( loc , toks ) , fn ( toks ) , or just fn ( ) , where :
- s = the original string being parsed ( see note below )
- loc = the location of the matching substring
- toks = a list of the matched tokens , packaged as a ParseResults object
If the functions in fns modify the tokens , they can return them as the return
value from fn , and the modified list of tokens will replace the original .
Otherwise , fn does not need to return any value .
Note : the default parsing behavior is to expand tabs in the input string
before starting the parsing process . See L { I { parseString } < parseString > } for more information
on parsing strings containing < TAB > s , and suggested methods to maintain a
consistent view of the parsed string , the parse location , and line and column
positions within the parsed string .
"""
self . parseAction = list ( map ( self . _normalizeParseActionArgs , list ( fns ) ) )
self . callDuringTry = ( " callDuringTry " in kwargs and kwargs [ " callDuringTry " ] )
return self
def addParseAction ( self , * fns , * * kwargs ) :
""" Add parse action to expression ' s list of parse actions. See L { I {setParseAction} <setParseAction>}. """
self . parseAction + = list ( map ( self . _normalizeParseActionArgs , list ( fns ) ) )
self . callDuringTry = self . callDuringTry or ( " callDuringTry " in kwargs and kwargs [ " callDuringTry " ] )
return self
def setFailAction ( self , fn ) :
""" Define action to perform if parsing fails at this expression.
Fail acton fn is a callable function that takes the arguments
fn ( s , loc , expr , err ) where :
- s = string being parsed
- loc = location where expression match was attempted and failed
- expr = the parse expression that failed
- err = the exception thrown
The function returns no value . It may throw ParseFatalException
if it is desired to stop parsing immediately . """
self . failAction = fn
return self
def _skipIgnorables ( self , instring , loc ) :
exprsFound = True
while exprsFound :
exprsFound = False
for e in self . ignoreExprs :
try :
while 1 :
loc , dummy = e . _parse ( instring , loc )
exprsFound = True
except ParseException :
pass
return loc
def preParse ( self , instring , loc ) :
if self . ignoreExprs :
loc = self . _skipIgnorables ( instring , loc )
if self . skipWhitespace :
wt = self . whiteChars
instrlen = len ( instring )
while loc < instrlen and instring [ loc ] in wt :
loc + = 1
return loc
def parseImpl ( self , instring , loc , doActions = True ) :
return loc , [ ]
def postParse ( self , instring , loc , tokenlist ) :
return tokenlist
#~ @profile
def _parseNoCache ( self , instring , loc , doActions = True , callPreParse = True ) :
debugging = ( self . debug ) #and doActions )
if debugging or self . failAction :
#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
if ( self . debugActions [ 0 ] ) :
self . debugActions [ 0 ] ( instring , loc , self )
if callPreParse and self . callPreparse :
preloc = self . preParse ( instring , loc )
else :
preloc = loc
tokensStart = loc
try :
try :
loc , tokens = self . parseImpl ( instring , preloc , doActions )
except IndexError :
raise ParseException ( instring , len ( instring ) , self . errmsg , self )
except ParseBaseException , err :
#~ print ("Exception raised:", err)
if self . debugActions [ 2 ] :
self . debugActions [ 2 ] ( instring , tokensStart , self , err )
if self . failAction :
self . failAction ( instring , tokensStart , self , err )
raise
else :
if callPreParse and self . callPreparse :
preloc = self . preParse ( instring , loc )
else :
preloc = loc
tokensStart = loc
if self . mayIndexError or loc > = len ( instring ) :
try :
loc , tokens = self . parseImpl ( instring , preloc , doActions )
except IndexError :
raise ParseException ( instring , len ( instring ) , self . errmsg , self )
else :
loc , tokens = self . parseImpl ( instring , preloc , doActions )
tokens = self . postParse ( instring , loc , tokens )
retTokens = ParseResults ( tokens , self . resultsName , asList = self . saveAsList , modal = self . modalResults )
if self . parseAction and ( doActions or self . callDuringTry ) :
if debugging :
try :
for fn in self . parseAction :
tokens = fn ( instring , tokensStart , retTokens )
if tokens is not None :
retTokens = ParseResults ( tokens ,
self . resultsName ,
asList = self . saveAsList and isinstance ( tokens , ( ParseResults , list ) ) ,
modal = self . modalResults )
except ParseBaseException , err :
#~ print "Exception raised in user parse action:", err
if ( self . debugActions [ 2 ] ) :
self . debugActions [ 2 ] ( instring , tokensStart , self , err )
raise
else :
for fn in self . parseAction :
tokens = fn ( instring , tokensStart , retTokens )
if tokens is not None :
retTokens = ParseResults ( tokens ,
self . resultsName ,
asList = self . saveAsList and isinstance ( tokens , ( ParseResults , list ) ) ,
modal = self . modalResults )
if debugging :
#~ print ("Matched",self,"->",retTokens.asList())
if ( self . debugActions [ 1 ] ) :
self . debugActions [ 1 ] ( instring , tokensStart , loc , self , retTokens )
return loc , retTokens
def tryParse ( self , instring , loc ) :
try :
return self . _parse ( instring , loc , doActions = False ) [ 0 ]
except ParseFatalException :
raise ParseException ( instring , loc , self . errmsg , self )
# this method gets repeatedly called during backtracking with the same arguments -
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
def _parseCache ( self , instring , loc , doActions = True , callPreParse = True ) :
lookup = ( self , instring , loc , callPreParse , doActions )
if lookup in ParserElement . _exprArgCache :
value = ParserElement . _exprArgCache [ lookup ]
if isinstance ( value , Exception ) :
raise value
return value
else :
try :
value = self . _parseNoCache ( instring , loc , doActions , callPreParse )
ParserElement . _exprArgCache [ lookup ] = ( value [ 0 ] , value [ 1 ] . copy ( ) )
return value
except ParseBaseException , pe :
ParserElement . _exprArgCache [ lookup ] = pe
raise
_parse = _parseNoCache
# argument cache for optimizing repeated calls when backtracking through recursive expressions
_exprArgCache = { }
def resetCache ( ) :
ParserElement . _exprArgCache . clear ( )
resetCache = staticmethod ( resetCache )
_packratEnabled = False
def enablePackrat ( ) :
""" Enables " packrat " parsing, which adds memoizing to the parsing logic.
Repeated parse attempts at the same string location ( which happens
often in many complex grammars ) can immediately return a cached value ,
instead of re - executing parsing / validating code . Memoizing is done of
both valid results and parsing exceptions .
This speedup may break existing programs that use parse actions that
have side - effects . For this reason , packrat parsing is disabled when
you first import pyparsing . To activate the packrat feature , your
program must call the class method ParserElement . enablePackrat ( ) . If
your program uses psyco to " compile as you go " , you must call
enablePackrat before calling psyco . full ( ) . If you do not do this ,
Python will crash . For best results , call enablePackrat ( ) immediately
after importing pyparsing .
"""
if not ParserElement . _packratEnabled :
ParserElement . _packratEnabled = True
ParserElement . _parse = ParserElement . _parseCache
enablePackrat = staticmethod ( enablePackrat )
def parseString ( self , instring , parseAll = False ) :
""" Execute the parse expression with the given string.
This is the main interface to the client code , once the complete
expression has been built .
If you want the grammar to require that the entire input string be
successfully parsed , then set parseAll to True ( equivalent to ending
the grammar with StringEnd ( ) ) .
Note : parseString implicitly calls expandtabs ( ) on the input string ,
in order to report proper column numbers in parse actions .
If the input string contains tabs and
the grammar uses parse actions that use the loc argument to index into the
string being parsed , you can ensure you have a consistent view of the input
string by :
- calling parseWithTabs on your grammar before calling parseString
( see L { I { parseWithTabs } < parseWithTabs > } )
- define your parse action using the full ( s , loc , toks ) signature , and
reference the input string using the parse action ' s s argument
- explictly expand the tabs in your input string before calling
parseString
"""
ParserElement . resetCache ( )
if not self . streamlined :
self . streamline ( )
#~ self.saveAsList = True
for e in self . ignoreExprs :
e . streamline ( )
if not self . keepTabs :
instring = instring . expandtabs ( )
try :
loc , tokens = self . _parse ( instring , 0 )
if parseAll :
loc = self . preParse ( instring , loc )
StringEnd ( ) . _parse ( instring , loc )
except ParseBaseException , exc :
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
else :
return tokens
def scanString ( self , instring , maxMatches = _MAX_INT ) :
""" Scan the input string for expression matches. Each match will return the
matching tokens , start location , and end location . May be called with optional
maxMatches argument , to clip scanning after ' n ' matches are found .
Note that the start and end locations are reported relative to the string
being parsed . See L { I { parseString } < parseString > } for more information on parsing
strings with embedded tabs . """
if not self . streamlined :
self . streamline ( )
for e in self . ignoreExprs :
e . streamline ( )
if not self . keepTabs :
instring = _ustr ( instring ) . expandtabs ( )
instrlen = len ( instring )
loc = 0
preparseFn = self . preParse
parseFn = self . _parse
ParserElement . resetCache ( )
matches = 0
try :
while loc < = instrlen and matches < maxMatches :
try :
preloc = preparseFn ( instring , loc )
nextLoc , tokens = parseFn ( instring , preloc , callPreParse = False )
except ParseException :
loc = preloc + 1
else :
matches + = 1
yield tokens , preloc , nextLoc
loc = nextLoc
except ParseBaseException , pe :
raise pe
def transformString ( self , instring ) :
""" Extension to scanString, to modify matching text with modified tokens that may
be returned from a parse action . To use transformString , define a grammar and
attach a parse action to it that modifies the returned token list .
Invoking transformString ( ) on a target string will then scan for matches ,
and replace the matched text patterns according to the logic in the parse
action . transformString ( ) returns the resulting transformed string . """
out = [ ]
lastE = 0
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
# keep string locs straight between transformString and scanString
self . keepTabs = True
try :
for t , s , e in self . scanString ( instring ) :
out . append ( instring [ lastE : s ] )
if t :
if isinstance ( t , ParseResults ) :
out + = t . asList ( )
elif isinstance ( t , list ) :
out + = t
else :
out . append ( t )
lastE = e
out . append ( instring [ lastE : ] )
return " " . join ( map ( _ustr , out ) )
except ParseBaseException , pe :
raise pe
def searchString ( self , instring , maxMatches = _MAX_INT ) :
""" Another extension to scanString, simplifying the access to the tokens found
to match the given parse expression . May be called with optional
maxMatches argument , to clip searching after ' n ' matches are found .
"""
try :
return ParseResults ( [ t for t , s , e in self . scanString ( instring , maxMatches ) ] )
except ParseBaseException , pe :
raise pe
def __add__ ( self , other ) :
""" Implementation of + operator - returns And """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return And ( [ self , other ] )
def __radd__ ( self , other ) :
""" Implementation of + operator when left operand is not a ParserElement """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return other + self
def __sub__ ( self , other ) :
""" Implementation of - operator, returns And with error stop """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return And ( [ self , And . _ErrorStop ( ) , other ] )
def __rsub__ ( self , other ) :
""" Implementation of - operator when left operand is not a ParserElement """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return other - self
def __mul__ ( self , other ) :
if isinstance ( other , int ) :
minElements , optElements = other , 0
elif isinstance ( other , tuple ) :
other = ( other + ( None , None ) ) [ : 2 ]
if other [ 0 ] is None :
other = ( 0 , other [ 1 ] )
if isinstance ( other [ 0 ] , int ) and other [ 1 ] is None :
if other [ 0 ] == 0 :
return ZeroOrMore ( self )
if other [ 0 ] == 1 :
return OneOrMore ( self )
else :
return self * other [ 0 ] + ZeroOrMore ( self )
elif isinstance ( other [ 0 ] , int ) and isinstance ( other [ 1 ] , int ) :
minElements , optElements = other
optElements - = minElements
else :
raise TypeError ( " cannot multiply ' ParserElement ' and ( ' %s ' , ' %s ' ) objects " , type ( other [ 0 ] ) , type ( other [ 1 ] ) )
else :
raise TypeError ( " cannot multiply ' ParserElement ' and ' %s ' objects " , type ( other ) )
if minElements < 0 :
raise ValueError ( " cannot multiply ParserElement by negative value " )
if optElements < 0 :
raise ValueError ( " second tuple value must be greater or equal to first tuple value " )
if minElements == optElements == 0 :
raise ValueError ( " cannot multiply ParserElement by 0 or (0,0) " )
if ( optElements ) :
def makeOptionalList ( n ) :
if n > 1 :
return Optional ( self + makeOptionalList ( n - 1 ) )
else :
return Optional ( self )
if minElements :
if minElements == 1 :
ret = self + makeOptionalList ( optElements )
else :
ret = And ( [ self ] * minElements ) + makeOptionalList ( optElements )
else :
ret = makeOptionalList ( optElements )
else :
if minElements == 1 :
ret = self
else :
ret = And ( [ self ] * minElements )
return ret
def __rmul__ ( self , other ) :
return self . __mul__ ( other )
def __or__ ( self , other ) :
""" Implementation of | operator - returns MatchFirst """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return MatchFirst ( [ self , other ] )
def __ror__ ( self , other ) :
""" Implementation of | operator when left operand is not a ParserElement """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return other | self
def __xor__ ( self , other ) :
""" Implementation of ^ operator - returns Or """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return Or ( [ self , other ] )
def __rxor__ ( self , other ) :
""" Implementation of ^ operator when left operand is not a ParserElement """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return other ^ self
def __and__ ( self , other ) :
""" Implementation of & operator - returns Each """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return Each ( [ self , other ] )
def __rand__ ( self , other ) :
""" Implementation of & operator when left operand is not a ParserElement """
if isinstance ( other , basestring ) :
other = Literal ( other )
if not isinstance ( other , ParserElement ) :
warnings . warn ( " Cannot combine element of type %s with ParserElement " % type ( other ) ,
SyntaxWarning , stacklevel = 2 )
return None
return other & self
def __invert__ ( self ) :
""" Implementation of ~ operator - returns NotAny """
return NotAny ( self )
def __call__ ( self , name ) :
""" Shortcut for setResultsName, with listAllMatches=default::
userdata = Word ( alphas ) . setResultsName ( " name " ) + Word ( nums + " - " ) . setResultsName ( " socsecno " )
could be written as : :
userdata = Word ( alphas ) ( " name " ) + Word ( nums + " - " ) ( " socsecno " )
"""
return self . setResultsName ( name )
def suppress ( self ) :
""" Suppresses the output of this ParserElement; useful to keep punctuation from
cluttering up returned output .
"""
return Suppress ( self )
def leaveWhitespace ( self ) :
""" Disables the skipping of whitespace before matching the characters in the
ParserElement ' s defined pattern. This is normally only used internally by
the pyparsing module , but may be needed in some whitespace - sensitive grammars .
"""
self . skipWhitespace = False
return self
def setWhitespaceChars ( self , chars ) :
""" Overrides the default whitespace chars
"""
self . skipWhitespace = True
self . whiteChars = chars
self . copyDefaultWhiteChars = False
return self
def parseWithTabs ( self ) :
""" Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
Must be called before parseString when the input grammar contains elements that
match < TAB > characters . """
self . keepTabs = True
return self
def ignore ( self , other ) :
""" Define expression to be ignored (e.g., comments) while doing pattern
matching ; may be called repeatedly , to define multiple comment or other
ignorable patterns .
"""
if isinstance ( other , Suppress ) :
if other not in self . ignoreExprs :
self . ignoreExprs . append ( other )
else :
self . ignoreExprs . append ( Suppress ( other ) )
return self
def setDebugActions ( self , startAction , successAction , exceptionAction ) :
""" Enable display of debugging messages while doing pattern matching. """
self . debugActions = ( startAction or _defaultStartDebugAction ,
successAction or _defaultSuccessDebugAction ,
exceptionAction or _defaultExceptionDebugAction )
self . debug = True
return self
def setDebug ( self , flag = True ) :
""" Enable display of debugging messages while doing pattern matching.
Set flag to True to enable , False to disable . """
if flag :
self . setDebugActions ( _defaultStartDebugAction , _defaultSuccessDebugAction , _defaultExceptionDebugAction )
else :
self . debug = False
return self
def __str__ ( self ) :
return self . name
def __repr__ ( self ) :
return _ustr ( self )
def streamline ( self ) :
self . streamlined = True
self . strRepr = None
return self
def checkRecursion ( self , parseElementList ) :
pass
def validate ( self , validateTrace = [ ] ) :
""" Check defined expressions for valid structure, check for infinite recursive definitions. """
self . checkRecursion ( [ ] )
def parseFile ( self , file_or_filename , parseAll = False ) :
""" Execute the parse expression on the given file or filename.
If a filename is specified ( instead of a file object ) ,
the entire file is opened , read , and closed before parsing .
"""
try :
file_contents = file_or_filename . read ( )
except AttributeError :
f = open ( file_or_filename , " rb " )
file_contents = f . read ( )
f . close ( )
try :
return self . parseString ( file_contents , parseAll )
except ParseBaseException , exc :
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def getException ( self ) :
return ParseException ( " " , 0 , self . errmsg , self )
def __getattr__ ( self , aname ) :
if aname == " myException " :
self . myException = ret = self . getException ( ) ;
return ret ;
else :
raise AttributeError ( " no such attribute " + aname )
def __eq__ ( self , other ) :
if isinstance ( other , ParserElement ) :
return self is other or self . __dict__ == other . __dict__
elif isinstance ( other , basestring ) :
try :
self . parseString ( _ustr ( other ) , parseAll = True )
return True
except ParseBaseException :
return False
else :
return super ( ParserElement , self ) == other
def __ne__ ( self , other ) :
return not ( self == other )
def __hash__ ( self ) :
return hash ( id ( self ) )
def __req__ ( self , other ) :
return self == other
def __rne__ ( self , other ) :
return not ( self == other )
class Token ( ParserElement ) :
""" Abstract ParserElement subclass, for defining atomic matching patterns. """
def __init__ ( self ) :
super ( Token , self ) . __init__ ( savelist = False )
#self.myException = ParseException("",0,"",self)
def setName ( self , name ) :
s = super ( Token , self ) . setName ( name )
self . errmsg = " Expected " + self . name
#s.myException.msg = self.errmsg
return s
class Empty ( Token ) :
""" An empty token, will always match. """
def __init__ ( self ) :
super ( Empty , self ) . __init__ ( )
self . name = " Empty "
self . mayReturnEmpty = True
self . mayIndexError = False
class NoMatch ( Token ) :
""" A token that will never match. """
def __init__ ( self ) :
super ( NoMatch , self ) . __init__ ( )
self . name = " NoMatch "
self . mayReturnEmpty = True
self . mayIndexError = False
self . errmsg = " Unmatchable token "
#self.myException.msg = self.errmsg
def parseImpl ( self , instring , loc , doActions = True ) :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class Literal ( Token ) :
""" Token to exactly match a specified string. """
def __init__ ( self , matchString ) :
super ( Literal , self ) . __init__ ( )
self . match = matchString
self . matchLen = len ( matchString )
try :
self . firstMatchChar = matchString [ 0 ]
except IndexError :
warnings . warn ( " null string passed to Literal; use Empty() instead " ,
SyntaxWarning , stacklevel = 2 )
self . __class__ = Empty
self . name = ' " %s " ' % _ustr ( self . match )
self . errmsg = " Expected " + self . name
self . mayReturnEmpty = False
#self.myException.msg = self.errmsg
self . mayIndexError = False
# Performance tuning: this routine gets called a *lot*
# if this is a single character match string and the first character matches,
# short-circuit as quickly as possible, and avoid calling startswith
#~ @profile
def parseImpl ( self , instring , loc , doActions = True ) :
if ( instring [ loc ] == self . firstMatchChar and
( self . matchLen == 1 or instring . startswith ( self . match , loc ) ) ) :
return loc + self . matchLen , self . match
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
_L = Literal
class Keyword ( Token ) :
""" Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non - keyword character . Compare with Literal : :
Literal ( " if " ) will match the leading ' if ' in ' ifAndOnlyIf ' .
Keyword ( " if " ) will not ; it will only match the leading ' if in ' if x = 1 ' , or ' if ( y == 2 ) '
Accepts two optional constructor arguments in addition to the keyword string :
identChars is a string of characters that would be valid identifier characters ,
defaulting to all alphanumerics + " _ " and " $ " ; caseless allows case - insensitive
matching , default is False .
"""
DEFAULT_KEYWORD_CHARS = alphanums + " _$ "
def __init__ ( self , matchString , identChars = DEFAULT_KEYWORD_CHARS , caseless = False ) :
super ( Keyword , self ) . __init__ ( )
self . match = matchString
self . matchLen = len ( matchString )
try :
self . firstMatchChar = matchString [ 0 ]
except IndexError :
warnings . warn ( " null string passed to Keyword; use Empty() instead " ,
SyntaxWarning , stacklevel = 2 )
self . name = ' " %s " ' % self . match
self . errmsg = " Expected " + self . name
self . mayReturnEmpty = False
#self.myException.msg = self.errmsg
self . mayIndexError = False
self . caseless = caseless
if caseless :
self . caselessmatch = matchString . upper ( )
identChars = identChars . upper ( )
self . identChars = _str2dict ( identChars )
def parseImpl ( self , instring , loc , doActions = True ) :
if self . caseless :
if ( ( instring [ loc : loc + self . matchLen ] . upper ( ) == self . caselessmatch ) and
( loc > = len ( instring ) - self . matchLen or instring [ loc + self . matchLen ] . upper ( ) not in self . identChars ) and
( loc == 0 or instring [ loc - 1 ] . upper ( ) not in self . identChars ) ) :
return loc + self . matchLen , self . match
else :
if ( instring [ loc ] == self . firstMatchChar and
( self . matchLen == 1 or instring . startswith ( self . match , loc ) ) and
( loc > = len ( instring ) - self . matchLen or instring [ loc + self . matchLen ] not in self . identChars ) and
( loc == 0 or instring [ loc - 1 ] not in self . identChars ) ) :
return loc + self . matchLen , self . match
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
def copy ( self ) :
c = super ( Keyword , self ) . copy ( )
c . identChars = Keyword . DEFAULT_KEYWORD_CHARS
return c
def setDefaultKeywordChars ( chars ) :
""" Overrides the default Keyword chars
"""
Keyword . DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod ( setDefaultKeywordChars )
class CaselessLiteral ( Literal ) :
""" Token to match a specified string, ignoring case of letters.
Note : the matched results will always be in the case of the given
match string , NOT the case of the input text .
"""
def __init__ ( self , matchString ) :
super ( CaselessLiteral , self ) . __init__ ( matchString . upper ( ) )
# Preserve the defining literal.
self . returnString = matchString
self . name = " ' %s ' " % self . returnString
self . errmsg = " Expected " + self . name
#self.myException.msg = self.errmsg
def parseImpl ( self , instring , loc , doActions = True ) :
if instring [ loc : loc + self . matchLen ] . upper ( ) == self . match :
return loc + self . matchLen , self . returnString
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class CaselessKeyword ( Keyword ) :
def __init__ ( self , matchString , identChars = Keyword . DEFAULT_KEYWORD_CHARS ) :
super ( CaselessKeyword , self ) . __init__ ( matchString , identChars , caseless = True )
def parseImpl ( self , instring , loc , doActions = True ) :
if ( ( instring [ loc : loc + self . matchLen ] . upper ( ) == self . caselessmatch ) and
( loc > = len ( instring ) - self . matchLen or instring [ loc + self . matchLen ] . upper ( ) not in self . identChars ) ) :
return loc + self . matchLen , self . match
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class Word ( Token ) :
""" Token for matching words composed of allowed character sets.
Defined with string containing all allowed initial characters ,
an optional string containing allowed body characters ( if omitted ,
defaults to the initial character set ) , and an optional minimum ,
maximum , and / or exact length . The default value for min is 1 ( a
minimum value < 1 is not valid ) ; the default values for max and exact
are 0 , meaning no maximum or exact length restriction .
"""
def __init__ ( self , initChars , bodyChars = None , min = 1 , max = 0 , exact = 0 , asKeyword = False ) :
super ( Word , self ) . __init__ ( )
self . initCharsOrig = initChars
self . initChars = _str2dict ( initChars )
if bodyChars :
self . bodyCharsOrig = bodyChars
self . bodyChars = _str2dict ( bodyChars )
else :
self . bodyCharsOrig = initChars
self . bodyChars = _str2dict ( initChars )
self . maxSpecified = max > 0
if min < 1 :
raise ValueError ( " cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted " )
self . minLen = min
if max > 0 :
self . maxLen = max
else :
self . maxLen = _MAX_INT
if exact > 0 :
self . maxLen = exact
self . minLen = exact
self . name = _ustr ( self )
self . errmsg = " Expected " + self . name
#self.myException.msg = self.errmsg
self . mayIndexError = False
self . asKeyword = asKeyword
if ' ' not in self . initCharsOrig + self . bodyCharsOrig and ( min == 1 and max == 0 and exact == 0 ) :
if self . bodyCharsOrig == self . initCharsOrig :
self . reString = " [ %s ]+ " % _escapeRegexRangeChars ( self . initCharsOrig )
elif len ( self . bodyCharsOrig ) == 1 :
self . reString = " %s [ %s ]* " % \
( re . escape ( self . initCharsOrig ) ,
_escapeRegexRangeChars ( self . bodyCharsOrig ) , )
else :
self . reString = " [ %s ][ %s ]* " % \
( _escapeRegexRangeChars ( self . initCharsOrig ) ,
_escapeRegexRangeChars ( self . bodyCharsOrig ) , )
if self . asKeyword :
self . reString = r " \ b " + self . reString + r " \ b "
try :
self . re = re . compile ( self . reString )
except :
self . re = None
def parseImpl ( self , instring , loc , doActions = True ) :
if self . re :
result = self . re . match ( instring , loc )
if not result :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
loc = result . end ( )
return loc , result . group ( )
if not ( instring [ loc ] in self . initChars ) :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
start = loc
loc + = 1
instrlen = len ( instring )
bodychars = self . bodyChars
maxloc = start + self . maxLen
maxloc = min ( maxloc , instrlen )
while loc < maxloc and instring [ loc ] in bodychars :
loc + = 1
throwException = False
if loc - start < self . minLen :
throwException = True
if self . maxSpecified and loc < instrlen and instring [ loc ] in bodychars :
throwException = True
if self . asKeyword :
if ( start > 0 and instring [ start - 1 ] in bodychars ) or ( loc < instrlen and instring [ loc ] in bodychars ) :
throwException = True
if throwException :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , instring [ start : loc ]
def __str__ ( self ) :
try :
return super ( Word , self ) . __str__ ( )
except :
pass
if self . strRepr is None :
def charsAsStr ( s ) :
if len ( s ) > 4 :
return s [ : 4 ] + " ... "
else :
return s
if ( self . initCharsOrig != self . bodyCharsOrig ) :
self . strRepr = " W:( %s , %s ) " % ( charsAsStr ( self . initCharsOrig ) , charsAsStr ( self . bodyCharsOrig ) )
else :
self . strRepr = " W:( %s ) " % charsAsStr ( self . initCharsOrig )
return self . strRepr
class Regex ( Token ) :
""" Token for matching strings that match a given regular expression.
Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module .
"""
def __init__ ( self , pattern , flags = 0 ) :
""" The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags. """
super ( Regex , self ) . __init__ ( )
if len ( pattern ) == 0 :
warnings . warn ( " null string passed to Regex; use Empty() instead " ,
SyntaxWarning , stacklevel = 2 )
self . pattern = pattern
self . flags = flags
try :
self . re = re . compile ( self . pattern , self . flags )
self . reString = self . pattern
except sre_constants . error :
warnings . warn ( " invalid pattern ( %s ) passed to Regex " % pattern ,
SyntaxWarning , stacklevel = 2 )
raise
self . name = _ustr ( self )
self . errmsg = " Expected " + self . name
#self.myException.msg = self.errmsg
self . mayIndexError = False
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
result = self . re . match ( instring , loc )
if not result :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
loc = result . end ( )
d = result . groupdict ( )
ret = ParseResults ( result . group ( ) )
if d :
for k in d :
ret [ k ] = d [ k ]
return loc , ret
def __str__ ( self ) :
try :
return super ( Regex , self ) . __str__ ( )
except :
pass
if self . strRepr is None :
self . strRepr = " Re:( %s ) " % repr ( self . pattern )
return self . strRepr
class QuotedString ( Token ) :
""" Token for matching strings that are delimited by quoting characters.
"""
def __init__ ( self , quoteChar , escChar = None , escQuote = None , multiline = False , unquoteResults = True , endQuoteChar = None ) :
"""
Defined with the following parameters :
- quoteChar - string of one or more characters defining the quote delimiting string
- escChar - character to escape quotes , typically backslash ( default = None )
- escQuote - special quote sequence to escape an embedded quote string ( such as SQL ' s " " to escape an embedded " ) (default=None)
- multiline - boolean indicating whether quotes can span multiple lines ( default = False )
- unquoteResults - boolean indicating whether the matched text should be unquoted ( default = True )
- endQuoteChar - string of one or more characters defining the end of the quote delimited string ( default = None = > same as quoteChar )
"""
super ( QuotedString , self ) . __init__ ( )
# remove white space from quote chars - wont work anyway
quoteChar = quoteChar . strip ( )
if len ( quoteChar ) == 0 :
warnings . warn ( " quoteChar cannot be the empty string " , SyntaxWarning , stacklevel = 2 )
raise SyntaxError ( )
if endQuoteChar is None :
endQuoteChar = quoteChar
else :
endQuoteChar = endQuoteChar . strip ( )
if len ( endQuoteChar ) == 0 :
warnings . warn ( " endQuoteChar cannot be the empty string " , SyntaxWarning , stacklevel = 2 )
raise SyntaxError ( )
self . quoteChar = quoteChar
self . quoteCharLen = len ( quoteChar )
self . firstQuoteChar = quoteChar [ 0 ]
self . endQuoteChar = endQuoteChar
self . endQuoteCharLen = len ( endQuoteChar )
self . escChar = escChar
self . escQuote = escQuote
self . unquoteResults = unquoteResults
if multiline :
self . flags = re . MULTILINE | re . DOTALL
self . pattern = r ' %s (?:[^ %s %s ] ' % \
( re . escape ( self . quoteChar ) ,
_escapeRegexRangeChars ( self . endQuoteChar [ 0 ] ) ,
( escChar is not None and _escapeRegexRangeChars ( escChar ) or ' ' ) )
else :
self . flags = 0
self . pattern = r ' %s (?:[^ %s \ n \ r %s ] ' % \
( re . escape ( self . quoteChar ) ,
_escapeRegexRangeChars ( self . endQuoteChar [ 0 ] ) ,
( escChar is not None and _escapeRegexRangeChars ( escChar ) or ' ' ) )
if len ( self . endQuoteChar ) > 1 :
self . pattern + = (
2011-08-18 21:58:44 +00:00
' |(?: ' + ' )|(?: ' . join ( [ " %s [^ %s ] " % ( re . escape ( self . endQuoteChar [ : i ] ) ,
2010-02-16 04:09:07 +00:00
_escapeRegexRangeChars ( self . endQuoteChar [ i ] ) )
2011-08-18 21:58:44 +00:00
for i in range ( len ( self . endQuoteChar ) - 1 , 0 , - 1 ) ] ) + ' ) '
2010-02-16 04:09:07 +00:00
)
if escQuote :
self . pattern + = ( r ' |(?: %s ) ' % re . escape ( escQuote ) )
if escChar :
self . pattern + = ( r ' |(?: %s .) ' % re . escape ( escChar ) )
self . escCharReplacePattern = re . escape ( self . escChar ) + " (.) "
self . pattern + = ( r ' )* %s ' % re . escape ( self . endQuoteChar ) )
try :
self . re = re . compile ( self . pattern , self . flags )
self . reString = self . pattern
except sre_constants . error :
warnings . warn ( " invalid pattern ( %s ) passed to Regex " % self . pattern ,
SyntaxWarning , stacklevel = 2 )
raise
self . name = _ustr ( self )
self . errmsg = " Expected " + self . name
#self.myException.msg = self.errmsg
self . mayIndexError = False
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
result = instring [ loc ] == self . firstQuoteChar and self . re . match ( instring , loc ) or None
if not result :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
loc = result . end ( )
ret = result . group ( )
if self . unquoteResults :
# strip off quotes
ret = ret [ self . quoteCharLen : - self . endQuoteCharLen ]
if isinstance ( ret , basestring ) :
# replace escaped characters
if self . escChar :
ret = re . sub ( self . escCharReplacePattern , " \ g<1> " , ret )
# replace escaped quotes
if self . escQuote :
ret = ret . replace ( self . escQuote , self . endQuoteChar )
return loc , ret
def __str__ ( self ) :
try :
return super ( QuotedString , self ) . __str__ ( )
except :
pass
if self . strRepr is None :
self . strRepr = " quoted string, starting with %s ending with %s " % ( self . quoteChar , self . endQuoteChar )
return self . strRepr
class CharsNotIn ( Token ) :
""" Token for matching words composed of characters *not* in a given set.
Defined with string containing all disallowed characters , and an optional
minimum , maximum , and / or exact length . The default value for min is 1 ( a
minimum value < 1 is not valid ) ; the default values for max and exact
are 0 , meaning no maximum or exact length restriction .
"""
def __init__ ( self , notChars , min = 1 , max = 0 , exact = 0 ) :
super ( CharsNotIn , self ) . __init__ ( )
self . skipWhitespace = False
self . notChars = notChars
if min < 1 :
raise ValueError ( " cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted " )
self . minLen = min
if max > 0 :
self . maxLen = max
else :
self . maxLen = _MAX_INT
if exact > 0 :
self . maxLen = exact
self . minLen = exact
self . name = _ustr ( self )
self . errmsg = " Expected " + self . name
self . mayReturnEmpty = ( self . minLen == 0 )
#self.myException.msg = self.errmsg
self . mayIndexError = False
def parseImpl ( self , instring , loc , doActions = True ) :
if instring [ loc ] in self . notChars :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
start = loc
loc + = 1
notchars = self . notChars
maxlen = min ( start + self . maxLen , len ( instring ) )
while loc < maxlen and \
( instring [ loc ] not in notchars ) :
loc + = 1
if loc - start < self . minLen :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , instring [ start : loc ]
def __str__ ( self ) :
try :
return super ( CharsNotIn , self ) . __str__ ( )
except :
pass
if self . strRepr is None :
if len ( self . notChars ) > 4 :
self . strRepr = " !W:( %s ...) " % self . notChars [ : 4 ]
else :
self . strRepr = " !W:( %s ) " % self . notChars
return self . strRepr
class White ( Token ) :
""" Special matching class for matching whitespace. Normally, whitespace is ignored
by pyparsing grammars . This class is included when some whitespace structures
are significant . Define with a string containing the whitespace characters to be
matched ; default is " \\ t \\ r \\ n " . Also takes optional min , max , and exact arguments ,
as defined for the Word class . """
whiteStrs = {
" " : " <SPC> " ,
" \t " : " <TAB> " ,
" \n " : " <LF> " ,
" \r " : " <CR> " ,
" \f " : " <FF> " ,
}
def __init__ ( self , ws = " \t \r \n " , min = 1 , max = 0 , exact = 0 ) :
super ( White , self ) . __init__ ( )
self . matchWhite = ws
2011-08-18 21:58:44 +00:00
self . setWhitespaceChars ( " " . join ( [ c for c in self . whiteChars if c not in self . matchWhite ] ) )
2010-02-16 04:09:07 +00:00
#~ self.leaveWhitespace()
2011-08-18 21:58:44 +00:00
self . name = ( " " . join ( [ White . whiteStrs [ c ] for c in self . matchWhite ] ) )
2010-02-16 04:09:07 +00:00
self . mayReturnEmpty = True
self . errmsg = " Expected " + self . name
#self.myException.msg = self.errmsg
self . minLen = min
if max > 0 :
self . maxLen = max
else :
self . maxLen = _MAX_INT
if exact > 0 :
self . maxLen = exact
self . minLen = exact
def parseImpl ( self , instring , loc , doActions = True ) :
if not ( instring [ loc ] in self . matchWhite ) :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
start = loc
loc + = 1
maxloc = start + self . maxLen
maxloc = min ( maxloc , len ( instring ) )
while loc < maxloc and instring [ loc ] in self . matchWhite :
loc + = 1
if loc - start < self . minLen :
#~ raise ParseException( instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , instring [ start : loc ]
class _PositionToken ( Token ) :
def __init__ ( self ) :
super ( _PositionToken , self ) . __init__ ( )
self . name = self . __class__ . __name__
self . mayReturnEmpty = True
self . mayIndexError = False
class GoToColumn ( _PositionToken ) :
""" Token to advance to a specific column of input text; useful for tabular report scraping. """
def __init__ ( self , colno ) :
super ( GoToColumn , self ) . __init__ ( )
self . col = colno
def preParse ( self , instring , loc ) :
if col ( loc , instring ) != self . col :
instrlen = len ( instring )
if self . ignoreExprs :
loc = self . _skipIgnorables ( instring , loc )
while loc < instrlen and instring [ loc ] . isspace ( ) and col ( loc , instring ) != self . col :
loc + = 1
return loc
def parseImpl ( self , instring , loc , doActions = True ) :
thiscol = col ( loc , instring )
if thiscol > self . col :
raise ParseException ( instring , loc , " Text not in expected column " , self )
newloc = loc + self . col - thiscol
ret = instring [ loc : newloc ]
return newloc , ret
class LineStart ( _PositionToken ) :
""" Matches if current position is at the beginning of a line within the parse string """
def __init__ ( self ) :
super ( LineStart , self ) . __init__ ( )
self . setWhitespaceChars ( ParserElement . DEFAULT_WHITE_CHARS . replace ( " \n " , " " ) )
self . errmsg = " Expected start of line "
#self.myException.msg = self.errmsg
def preParse ( self , instring , loc ) :
preloc = super ( LineStart , self ) . preParse ( instring , loc )
if instring [ preloc ] == " \n " :
loc + = 1
return loc
def parseImpl ( self , instring , loc , doActions = True ) :
if not ( loc == 0 or
( loc == self . preParse ( instring , 0 ) ) or
( instring [ loc - 1 ] == " \n " ) ) : #col(loc, instring) != 1:
#~ raise ParseException( instring, loc, "Expected start of line" )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , [ ]
class LineEnd ( _PositionToken ) :
""" Matches if current position is at the end of a line within the parse string """
def __init__ ( self ) :
super ( LineEnd , self ) . __init__ ( )
self . setWhitespaceChars ( ParserElement . DEFAULT_WHITE_CHARS . replace ( " \n " , " " ) )
self . errmsg = " Expected end of line "
#self.myException.msg = self.errmsg
def parseImpl ( self , instring , loc , doActions = True ) :
if loc < len ( instring ) :
if instring [ loc ] == " \n " :
return loc + 1 , " \n "
else :
#~ raise ParseException( instring, loc, "Expected end of line" )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
elif loc == len ( instring ) :
return loc + 1 , [ ]
else :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class StringStart ( _PositionToken ) :
""" Matches if current position is at the beginning of the parse string """
def __init__ ( self ) :
super ( StringStart , self ) . __init__ ( )
self . errmsg = " Expected start of text "
#self.myException.msg = self.errmsg
def parseImpl ( self , instring , loc , doActions = True ) :
if loc != 0 :
# see if entire string up to here is just whitespace and ignoreables
if loc != self . preParse ( instring , 0 ) :
#~ raise ParseException( instring, loc, "Expected start of text" )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , [ ]
class StringEnd ( _PositionToken ) :
""" Matches if current position is at the end of the parse string """
def __init__ ( self ) :
super ( StringEnd , self ) . __init__ ( )
self . errmsg = " Expected end of text "
#self.myException.msg = self.errmsg
def parseImpl ( self , instring , loc , doActions = True ) :
if loc < len ( instring ) :
#~ raise ParseException( instring, loc, "Expected end of text" )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
elif loc == len ( instring ) :
return loc + 1 , [ ]
elif loc > len ( instring ) :
return loc , [ ]
else :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class WordStart ( _PositionToken ) :
""" Matches if the current position is at the beginning of a Word, and
is not preceded by any character in a given set of wordChars
( default = printables ) . To emulate the \b behavior of regular expressions ,
use WordStart ( alphanums ) . WordStart will also match at the beginning of
the string being parsed , or at the beginning of a line .
"""
def __init__ ( self , wordChars = printables ) :
super ( WordStart , self ) . __init__ ( )
self . wordChars = _str2dict ( wordChars )
self . errmsg = " Not at the start of a word "
def parseImpl ( self , instring , loc , doActions = True ) :
if loc != 0 :
if ( instring [ loc - 1 ] in self . wordChars or
instring [ loc ] not in self . wordChars ) :
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , [ ]
class WordEnd ( _PositionToken ) :
""" Matches if the current position is at the end of a Word, and
is not followed by any character in a given set of wordChars
( default = printables ) . To emulate the \b behavior of regular expressions ,
use WordEnd ( alphanums ) . WordEnd will also match at the end of
the string being parsed , or at the end of a line .
"""
def __init__ ( self , wordChars = printables ) :
super ( WordEnd , self ) . __init__ ( )
self . wordChars = _str2dict ( wordChars )
self . skipWhitespace = False
self . errmsg = " Not at the end of a word "
def parseImpl ( self , instring , loc , doActions = True ) :
instrlen = len ( instring )
if instrlen > 0 and loc < instrlen :
if ( instring [ loc ] in self . wordChars or
instring [ loc - 1 ] not in self . wordChars ) :
#~ raise ParseException( instring, loc, "Expected end of word" )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , [ ]
class ParseExpression ( ParserElement ) :
""" Abstract subclass of ParserElement, for combining and post-processing parsed tokens. """
def __init__ ( self , exprs , savelist = False ) :
super ( ParseExpression , self ) . __init__ ( savelist )
if isinstance ( exprs , list ) :
self . exprs = exprs
elif isinstance ( exprs , basestring ) :
self . exprs = [ Literal ( exprs ) ]
else :
try :
self . exprs = list ( exprs )
except TypeError :
self . exprs = [ exprs ]
self . callPreparse = False
def __getitem__ ( self , i ) :
return self . exprs [ i ]
def append ( self , other ) :
self . exprs . append ( other )
self . strRepr = None
return self
def leaveWhitespace ( self ) :
""" Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
all contained expressions . """
self . skipWhitespace = False
self . exprs = [ e . copy ( ) for e in self . exprs ]
for e in self . exprs :
e . leaveWhitespace ( )
return self
def ignore ( self , other ) :
if isinstance ( other , Suppress ) :
if other not in self . ignoreExprs :
super ( ParseExpression , self ) . ignore ( other )
for e in self . exprs :
e . ignore ( self . ignoreExprs [ - 1 ] )
else :
super ( ParseExpression , self ) . ignore ( other )
for e in self . exprs :
e . ignore ( self . ignoreExprs [ - 1 ] )
return self
def __str__ ( self ) :
try :
return super ( ParseExpression , self ) . __str__ ( )
except :
pass
if self . strRepr is None :
self . strRepr = " %s :( %s ) " % ( self . __class__ . __name__ , _ustr ( self . exprs ) )
return self . strRepr
def streamline ( self ) :
super ( ParseExpression , self ) . streamline ( )
for e in self . exprs :
e . streamline ( )
# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
# but only if there are no parse actions or resultsNames on the nested And's
# (likewise for Or's and MatchFirst's)
if ( len ( self . exprs ) == 2 ) :
other = self . exprs [ 0 ]
if ( isinstance ( other , self . __class__ ) and
not ( other . parseAction ) and
other . resultsName is None and
not other . debug ) :
self . exprs = other . exprs [ : ] + [ self . exprs [ 1 ] ]
self . strRepr = None
self . mayReturnEmpty | = other . mayReturnEmpty
self . mayIndexError | = other . mayIndexError
other = self . exprs [ - 1 ]
if ( isinstance ( other , self . __class__ ) and
not ( other . parseAction ) and
other . resultsName is None and
not other . debug ) :
self . exprs = self . exprs [ : - 1 ] + other . exprs [ : ]
self . strRepr = None
self . mayReturnEmpty | = other . mayReturnEmpty
self . mayIndexError | = other . mayIndexError
return self
def setResultsName ( self , name , listAllMatches = False ) :
ret = super ( ParseExpression , self ) . setResultsName ( name , listAllMatches )
return ret
def validate ( self , validateTrace = [ ] ) :
tmp = validateTrace [ : ] + [ self ]
for e in self . exprs :
e . validate ( tmp )
self . checkRecursion ( [ ] )
class And ( ParseExpression ) :
""" Requires all given ParseExpressions to be found in the given order.
Expressions may be separated by whitespace .
May be constructed using the ' + ' operator .
"""
class _ErrorStop ( Empty ) :
def __init__ ( self , * args , * * kwargs ) :
super ( Empty , self ) . __init__ ( * args , * * kwargs )
self . leaveWhitespace ( )
def __init__ ( self , exprs , savelist = True ) :
super ( And , self ) . __init__ ( exprs , savelist )
self . mayReturnEmpty = True
for e in self . exprs :
if not e . mayReturnEmpty :
self . mayReturnEmpty = False
break
self . setWhitespaceChars ( exprs [ 0 ] . whiteChars )
self . skipWhitespace = exprs [ 0 ] . skipWhitespace
self . callPreparse = True
def parseImpl ( self , instring , loc , doActions = True ) :
# pass False as last arg to _parse for first element, since we already
# pre-parsed the string as part of our And pre-parsing
loc , resultlist = self . exprs [ 0 ] . _parse ( instring , loc , doActions , callPreParse = False )
errorStop = False
for e in self . exprs [ 1 : ] :
if isinstance ( e , And . _ErrorStop ) :
errorStop = True
continue
if errorStop :
try :
loc , exprtokens = e . _parse ( instring , loc , doActions )
except ParseSyntaxException :
raise
except ParseBaseException , pe :
raise ParseSyntaxException ( pe )
except IndexError , ie :
raise ParseSyntaxException ( ParseException ( instring , len ( instring ) , self . errmsg , self ) )
else :
loc , exprtokens = e . _parse ( instring , loc , doActions )
if exprtokens or exprtokens . keys ( ) :
resultlist + = exprtokens
return loc , resultlist
def __iadd__ ( self , other ) :
if isinstance ( other , basestring ) :
other = Literal ( other )
return self . append ( other ) #And( [ self, other ] )
def checkRecursion ( self , parseElementList ) :
subRecCheckList = parseElementList [ : ] + [ self ]
for e in self . exprs :
e . checkRecursion ( subRecCheckList )
if not e . mayReturnEmpty :
break
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
2011-08-18 21:58:44 +00:00
self . strRepr = " { " + " " . join ( [ _ustr ( e ) for e in self . exprs ] ) + " } "
2010-02-16 04:09:07 +00:00
return self . strRepr
class Or ( ParseExpression ) :
""" Requires that at least one ParseExpression is found.
If two expressions match , the expression that matches the longest string will be used .
May be constructed using the ' ^ ' operator .
"""
def __init__ ( self , exprs , savelist = False ) :
super ( Or , self ) . __init__ ( exprs , savelist )
self . mayReturnEmpty = False
for e in self . exprs :
if e . mayReturnEmpty :
self . mayReturnEmpty = True
break
def parseImpl ( self , instring , loc , doActions = True ) :
maxExcLoc = - 1
maxMatchLoc = - 1
maxException = None
for e in self . exprs :
try :
loc2 = e . tryParse ( instring , loc )
except ParseException , err :
if err . loc > maxExcLoc :
maxException = err
maxExcLoc = err . loc
except IndexError :
if len ( instring ) > maxExcLoc :
maxException = ParseException ( instring , len ( instring ) , e . errmsg , self )
maxExcLoc = len ( instring )
else :
if loc2 > maxMatchLoc :
maxMatchLoc = loc2
maxMatchExp = e
if maxMatchLoc < 0 :
if maxException is not None :
raise maxException
else :
raise ParseException ( instring , loc , " no defined alternatives to match " , self )
return maxMatchExp . _parse ( instring , loc , doActions )
def __ixor__ ( self , other ) :
if isinstance ( other , basestring ) :
other = Literal ( other )
return self . append ( other ) #Or( [ self, other ] )
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
2011-08-18 21:58:44 +00:00
self . strRepr = " { " + " ^ " . join ( [ _ustr ( e ) for e in self . exprs ] ) + " } "
2010-02-16 04:09:07 +00:00
return self . strRepr
def checkRecursion ( self , parseElementList ) :
subRecCheckList = parseElementList [ : ] + [ self ]
for e in self . exprs :
e . checkRecursion ( subRecCheckList )
class MatchFirst ( ParseExpression ) :
""" Requires that at least one ParseExpression is found.
If two expressions match , the first one listed is the one that will match .
May be constructed using the ' | ' operator .
"""
def __init__ ( self , exprs , savelist = False ) :
super ( MatchFirst , self ) . __init__ ( exprs , savelist )
if exprs :
self . mayReturnEmpty = False
for e in self . exprs :
if e . mayReturnEmpty :
self . mayReturnEmpty = True
break
else :
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
maxExcLoc = - 1
maxException = None
for e in self . exprs :
try :
ret = e . _parse ( instring , loc , doActions )
return ret
except ParseException , err :
if err . loc > maxExcLoc :
maxException = err
maxExcLoc = err . loc
except IndexError :
if len ( instring ) > maxExcLoc :
maxException = ParseException ( instring , len ( instring ) , e . errmsg , self )
maxExcLoc = len ( instring )
# only got here if no expression matched, raise exception for match that made it the furthest
else :
if maxException is not None :
raise maxException
else :
raise ParseException ( instring , loc , " no defined alternatives to match " , self )
def __ior__ ( self , other ) :
if isinstance ( other , basestring ) :
other = Literal ( other )
return self . append ( other ) #MatchFirst( [ self, other ] )
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
2011-08-18 21:58:44 +00:00
self . strRepr = " { " + " | " . join ( [ _ustr ( e ) for e in self . exprs ] ) + " } "
2010-02-16 04:09:07 +00:00
return self . strRepr
def checkRecursion ( self , parseElementList ) :
subRecCheckList = parseElementList [ : ] + [ self ]
for e in self . exprs :
e . checkRecursion ( subRecCheckList )
class Each ( ParseExpression ) :
""" Requires all given ParseExpressions to be found, but in any order.
Expressions may be separated by whitespace .
May be constructed using the ' & ' operator .
"""
def __init__ ( self , exprs , savelist = True ) :
super ( Each , self ) . __init__ ( exprs , savelist )
self . mayReturnEmpty = True
for e in self . exprs :
if not e . mayReturnEmpty :
self . mayReturnEmpty = False
break
self . skipWhitespace = True
self . initExprGroups = True
def parseImpl ( self , instring , loc , doActions = True ) :
if self . initExprGroups :
self . optionals = [ e . expr for e in self . exprs if isinstance ( e , Optional ) ]
self . multioptionals = [ e . expr for e in self . exprs if isinstance ( e , ZeroOrMore ) ]
self . multirequired = [ e . expr for e in self . exprs if isinstance ( e , OneOrMore ) ]
self . required = [ e for e in self . exprs if not isinstance ( e , ( Optional , ZeroOrMore , OneOrMore ) ) ]
self . required + = self . multirequired
self . initExprGroups = False
tmpLoc = loc
tmpReqd = self . required [ : ]
tmpOpt = self . optionals [ : ]
matchOrder = [ ]
keepMatching = True
while keepMatching :
tmpExprs = tmpReqd + tmpOpt + self . multioptionals + self . multirequired
failed = [ ]
for e in tmpExprs :
try :
tmpLoc = e . tryParse ( instring , tmpLoc )
except ParseException :
failed . append ( e )
else :
matchOrder . append ( e )
if e in tmpReqd :
tmpReqd . remove ( e )
elif e in tmpOpt :
tmpOpt . remove ( e )
if len ( failed ) == len ( tmpExprs ) :
keepMatching = False
if tmpReqd :
2011-08-18 21:58:44 +00:00
missing = " , " . join ( [ _ustr ( e ) for e in tmpReqd ] )
2010-02-16 04:09:07 +00:00
raise ParseException ( instring , loc , " Missing one or more required elements ( %s ) " % missing )
# add any unmatched Optionals, in case they have default values defined
matchOrder + = list ( e for e in self . exprs if isinstance ( e , Optional ) and e . expr in tmpOpt )
resultlist = [ ]
for e in matchOrder :
loc , results = e . _parse ( instring , loc , doActions )
resultlist . append ( results )
finalResults = ParseResults ( [ ] )
for r in resultlist :
dups = { }
for k in r . keys ( ) :
if k in finalResults . keys ( ) :
tmp = ParseResults ( finalResults [ k ] )
tmp + = ParseResults ( r [ k ] )
dups [ k ] = tmp
finalResults + = ParseResults ( r )
for k , v in dups . items ( ) :
finalResults [ k ] = v
return loc , finalResults
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
2011-08-18 21:58:44 +00:00
self . strRepr = " { " + " & " . join ( [ _ustr ( e ) for e in self . exprs ] ) + " } "
2010-02-16 04:09:07 +00:00
return self . strRepr
def checkRecursion ( self , parseElementList ) :
subRecCheckList = parseElementList [ : ] + [ self ]
for e in self . exprs :
e . checkRecursion ( subRecCheckList )
class ParseElementEnhance ( ParserElement ) :
""" Abstract subclass of ParserElement, for combining and post-processing parsed tokens. """
def __init__ ( self , expr , savelist = False ) :
super ( ParseElementEnhance , self ) . __init__ ( savelist )
if isinstance ( expr , basestring ) :
expr = Literal ( expr )
self . expr = expr
self . strRepr = None
if expr is not None :
self . mayIndexError = expr . mayIndexError
self . mayReturnEmpty = expr . mayReturnEmpty
self . setWhitespaceChars ( expr . whiteChars )
self . skipWhitespace = expr . skipWhitespace
self . saveAsList = expr . saveAsList
self . callPreparse = expr . callPreparse
self . ignoreExprs . extend ( expr . ignoreExprs )
def parseImpl ( self , instring , loc , doActions = True ) :
if self . expr is not None :
return self . expr . _parse ( instring , loc , doActions , callPreParse = False )
else :
raise ParseException ( " " , loc , self . errmsg , self )
def leaveWhitespace ( self ) :
self . skipWhitespace = False
self . expr = self . expr . copy ( )
if self . expr is not None :
self . expr . leaveWhitespace ( )
return self
def ignore ( self , other ) :
if isinstance ( other , Suppress ) :
if other not in self . ignoreExprs :
super ( ParseElementEnhance , self ) . ignore ( other )
if self . expr is not None :
self . expr . ignore ( self . ignoreExprs [ - 1 ] )
else :
super ( ParseElementEnhance , self ) . ignore ( other )
if self . expr is not None :
self . expr . ignore ( self . ignoreExprs [ - 1 ] )
return self
def streamline ( self ) :
super ( ParseElementEnhance , self ) . streamline ( )
if self . expr is not None :
self . expr . streamline ( )
return self
def checkRecursion ( self , parseElementList ) :
if self in parseElementList :
raise RecursiveGrammarException ( parseElementList + [ self ] )
subRecCheckList = parseElementList [ : ] + [ self ]
if self . expr is not None :
self . expr . checkRecursion ( subRecCheckList )
def validate ( self , validateTrace = [ ] ) :
tmp = validateTrace [ : ] + [ self ]
if self . expr is not None :
self . expr . validate ( tmp )
self . checkRecursion ( [ ] )
def __str__ ( self ) :
try :
return super ( ParseElementEnhance , self ) . __str__ ( )
except :
pass
if self . strRepr is None and self . expr is not None :
self . strRepr = " %s :( %s ) " % ( self . __class__ . __name__ , _ustr ( self . expr ) )
return self . strRepr
class FollowedBy ( ParseElementEnhance ) :
""" Lookahead matching of the given parse expression. FollowedBy
does * not * advance the parsing position within the input string , it only
verifies that the specified parse expression matches at the current
position . FollowedBy always returns a null token list . """
def __init__ ( self , expr ) :
super ( FollowedBy , self ) . __init__ ( expr )
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
self . expr . tryParse ( instring , loc )
return loc , [ ]
class NotAny ( ParseElementEnhance ) :
""" Lookahead to disallow matching with the given parse expression. NotAny
does * not * advance the parsing position within the input string , it only
verifies that the specified parse expression does * not * match at the current
position . Also , NotAny does * not * skip over leading whitespace . NotAny
always returns a null token list . May be constructed using the ' ~ ' operator . """
def __init__ ( self , expr ) :
super ( NotAny , self ) . __init__ ( expr )
#~ self.leaveWhitespace()
self . skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
self . mayReturnEmpty = True
self . errmsg = " Found unwanted token, " + _ustr ( self . expr )
#self.myException = ParseException("",0,self.errmsg,self)
def parseImpl ( self , instring , loc , doActions = True ) :
try :
self . expr . tryParse ( instring , loc )
except ( ParseException , IndexError ) :
pass
else :
#~ raise ParseException(instring, loc, self.errmsg )
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
return loc , [ ]
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
self . strRepr = " ~ { " + _ustr ( self . expr ) + " } "
return self . strRepr
class ZeroOrMore ( ParseElementEnhance ) :
""" Optional repetition of zero or more of the given expression. """
def __init__ ( self , expr ) :
super ( ZeroOrMore , self ) . __init__ ( expr )
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
tokens = [ ]
try :
loc , tokens = self . expr . _parse ( instring , loc , doActions , callPreParse = False )
hasIgnoreExprs = ( len ( self . ignoreExprs ) > 0 )
while 1 :
if hasIgnoreExprs :
preloc = self . _skipIgnorables ( instring , loc )
else :
preloc = loc
loc , tmptokens = self . expr . _parse ( instring , preloc , doActions )
if tmptokens or tmptokens . keys ( ) :
tokens + = tmptokens
except ( ParseException , IndexError ) :
pass
return loc , tokens
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
self . strRepr = " [ " + _ustr ( self . expr ) + " ]... "
return self . strRepr
def setResultsName ( self , name , listAllMatches = False ) :
ret = super ( ZeroOrMore , self ) . setResultsName ( name , listAllMatches )
ret . saveAsList = True
return ret
class OneOrMore ( ParseElementEnhance ) :
""" Repetition of one or more of the given expression. """
def parseImpl ( self , instring , loc , doActions = True ) :
# must be at least one
loc , tokens = self . expr . _parse ( instring , loc , doActions , callPreParse = False )
try :
hasIgnoreExprs = ( len ( self . ignoreExprs ) > 0 )
while 1 :
if hasIgnoreExprs :
preloc = self . _skipIgnorables ( instring , loc )
else :
preloc = loc
loc , tmptokens = self . expr . _parse ( instring , preloc , doActions )
if tmptokens or tmptokens . keys ( ) :
tokens + = tmptokens
except ( ParseException , IndexError ) :
pass
return loc , tokens
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
self . strRepr = " { " + _ustr ( self . expr ) + " }... "
return self . strRepr
def setResultsName ( self , name , listAllMatches = False ) :
ret = super ( OneOrMore , self ) . setResultsName ( name , listAllMatches )
ret . saveAsList = True
return ret
class _NullToken ( object ) :
def __bool__ ( self ) :
return False
__nonzero__ = __bool__
def __str__ ( self ) :
return " "
_optionalNotMatched = _NullToken ( )
class Optional ( ParseElementEnhance ) :
""" Optional matching of the given expression.
A default return string can also be specified , if the optional expression
is not found .
"""
def __init__ ( self , exprs , default = _optionalNotMatched ) :
super ( Optional , self ) . __init__ ( exprs , savelist = False )
self . defaultValue = default
self . mayReturnEmpty = True
def parseImpl ( self , instring , loc , doActions = True ) :
try :
loc , tokens = self . expr . _parse ( instring , loc , doActions , callPreParse = False )
except ( ParseException , IndexError ) :
if self . defaultValue is not _optionalNotMatched :
if self . expr . resultsName :
tokens = ParseResults ( [ self . defaultValue ] )
tokens [ self . expr . resultsName ] = self . defaultValue
else :
tokens = [ self . defaultValue ]
else :
tokens = [ ]
return loc , tokens
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
if self . strRepr is None :
self . strRepr = " [ " + _ustr ( self . expr ) + " ] "
return self . strRepr
class SkipTo ( ParseElementEnhance ) :
""" Token for skipping over all undefined text until the matched expression is found.
If include is set to true , the matched expression is also parsed ( the skipped text
and matched expression are returned as a 2 - element list ) . The ignore
argument is used to define grammars ( typically quoted strings and comments ) that
might contain false matches .
"""
def __init__ ( self , other , include = False , ignore = None , failOn = None ) :
super ( SkipTo , self ) . __init__ ( other )
self . ignoreExpr = ignore
self . mayReturnEmpty = True
self . mayIndexError = False
self . includeMatch = include
self . asList = False
if failOn is not None and isinstance ( failOn , basestring ) :
self . failOn = Literal ( failOn )
else :
self . failOn = failOn
self . errmsg = " No match found for " + _ustr ( self . expr )
#self.myException = ParseException("",0,self.errmsg,self)
def parseImpl ( self , instring , loc , doActions = True ) :
startLoc = loc
instrlen = len ( instring )
expr = self . expr
failParse = False
while loc < = instrlen :
try :
if self . failOn :
try :
self . failOn . tryParse ( instring , loc )
except ParseBaseException :
pass
else :
failParse = True
raise ParseException ( instring , loc , " Found expression " + str ( self . failOn ) )
failParse = False
if self . ignoreExpr is not None :
while 1 :
try :
loc = self . ignoreExpr . tryParse ( instring , loc )
print " found ignoreExpr, advance to " , loc
except ParseBaseException :
break
expr . _parse ( instring , loc , doActions = False , callPreParse = False )
skipText = instring [ startLoc : loc ]
if self . includeMatch :
loc , mat = expr . _parse ( instring , loc , doActions , callPreParse = False )
if mat :
skipRes = ParseResults ( skipText )
skipRes + = mat
return loc , [ skipRes ]
else :
return loc , [ skipText ]
else :
return loc , [ skipText ]
except ( ParseException , IndexError ) :
if failParse :
raise
else :
loc + = 1
exc = self . myException
exc . loc = loc
exc . pstr = instring
raise exc
class Forward ( ParseElementEnhance ) :
""" Forward declaration of an expression to be defined later -
used for recursive grammars , such as algebraic infix notation .
When the expression is known , it is assigned to the Forward variable using the ' << ' operator .
Note : take care when assigning to Forward not to overlook precedence of operators .
Specifically , ' | ' has a lower precedence than ' << ' , so that : :
fwdExpr << a | b | c
will actually be evaluated as : :
( fwdExpr << a ) | b | c
thereby leaving b and c out as parseable alternatives . It is recommended that you
explicitly group the values inserted into the Forward : :
fwdExpr << ( a | b | c )
"""
def __init__ ( self , other = None ) :
super ( Forward , self ) . __init__ ( other , savelist = False )
def __lshift__ ( self , other ) :
if isinstance ( other , basestring ) :
other = Literal ( other )
self . expr = other
self . mayReturnEmpty = other . mayReturnEmpty
self . strRepr = None
self . mayIndexError = self . expr . mayIndexError
self . mayReturnEmpty = self . expr . mayReturnEmpty
self . setWhitespaceChars ( self . expr . whiteChars )
self . skipWhitespace = self . expr . skipWhitespace
self . saveAsList = self . expr . saveAsList
self . ignoreExprs . extend ( self . expr . ignoreExprs )
return None
def leaveWhitespace ( self ) :
self . skipWhitespace = False
return self
def streamline ( self ) :
if not self . streamlined :
self . streamlined = True
if self . expr is not None :
self . expr . streamline ( )
return self
def validate ( self , validateTrace = [ ] ) :
if self not in validateTrace :
tmp = validateTrace [ : ] + [ self ]
if self . expr is not None :
self . expr . validate ( tmp )
self . checkRecursion ( [ ] )
def __str__ ( self ) :
if hasattr ( self , " name " ) :
return self . name
self . _revertClass = self . __class__
self . __class__ = _ForwardNoRecurse
try :
if self . expr is not None :
retString = _ustr ( self . expr )
else :
retString = " None "
finally :
self . __class__ = self . _revertClass
return self . __class__ . __name__ + " : " + retString
def copy ( self ) :
if self . expr is not None :
return super ( Forward , self ) . copy ( )
else :
ret = Forward ( )
ret << self
return ret
class _ForwardNoRecurse ( Forward ) :
def __str__ ( self ) :
return " ... "
class TokenConverter ( ParseElementEnhance ) :
""" Abstract subclass of ParseExpression, for converting parsed results. """
def __init__ ( self , expr , savelist = False ) :
super ( TokenConverter , self ) . __init__ ( expr ) #, savelist )
self . saveAsList = False
class Upcase ( TokenConverter ) :
""" Converter to upper case all matching tokens. """
def __init__ ( self , * args ) :
super ( Upcase , self ) . __init__ ( * args )
warnings . warn ( " Upcase class is deprecated, use upcaseTokens parse action instead " ,
DeprecationWarning , stacklevel = 2 )
def postParse ( self , instring , loc , tokenlist ) :
return list ( map ( string . upper , tokenlist ) )
class Combine ( TokenConverter ) :
""" Converter to concatenate all matching tokens to a single string.
By default , the matching patterns must also be contiguous in the input string ;
this can be disabled by specifying ' adjacent=False ' in the constructor .
"""
def __init__ ( self , expr , joinString = " " , adjacent = True ) :
super ( Combine , self ) . __init__ ( expr )
# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
if adjacent :
self . leaveWhitespace ( )
self . adjacent = adjacent
self . skipWhitespace = True
self . joinString = joinString
def ignore ( self , other ) :
if self . adjacent :
ParserElement . ignore ( self , other )
else :
super ( Combine , self ) . ignore ( other )
return self
def postParse ( self , instring , loc , tokenlist ) :
retToks = tokenlist . copy ( )
del retToks [ : ]
retToks + = ParseResults ( [ " " . join ( tokenlist . _asStringList ( self . joinString ) ) ] , modal = self . modalResults )
if self . resultsName and len ( retToks . keys ( ) ) > 0 :
return [ retToks ]
else :
return retToks
class Group ( TokenConverter ) :
""" Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions. """
def __init__ ( self , expr ) :
super ( Group , self ) . __init__ ( expr )
self . saveAsList = True
def postParse ( self , instring , loc , tokenlist ) :
return [ tokenlist ]
class Dict ( TokenConverter ) :
""" Converter to return a repetitive expression as a list, but also as a dictionary.
Each element can also be referenced using the first token in the expression as its key .
Useful for tabular report scraping when the first column can be used as a item key .
"""
def __init__ ( self , exprs ) :
super ( Dict , self ) . __init__ ( exprs )
self . saveAsList = True
def postParse ( self , instring , loc , tokenlist ) :
for i , tok in enumerate ( tokenlist ) :
if len ( tok ) == 0 :
continue
ikey = tok [ 0 ]
if isinstance ( ikey , int ) :
ikey = _ustr ( tok [ 0 ] ) . strip ( )
if len ( tok ) == 1 :
tokenlist [ ikey ] = _ParseResultsWithOffset ( " " , i )
elif len ( tok ) == 2 and not isinstance ( tok [ 1 ] , ParseResults ) :
tokenlist [ ikey ] = _ParseResultsWithOffset ( tok [ 1 ] , i )
else :
dictvalue = tok . copy ( ) #ParseResults(i)
del dictvalue [ 0 ]
if len ( dictvalue ) != 1 or ( isinstance ( dictvalue , ParseResults ) and dictvalue . keys ( ) ) :
tokenlist [ ikey ] = _ParseResultsWithOffset ( dictvalue , i )
else :
tokenlist [ ikey ] = _ParseResultsWithOffset ( dictvalue [ 0 ] , i )
if self . resultsName :
return [ tokenlist ]
else :
return tokenlist
class Suppress ( TokenConverter ) :
""" Converter for ignoring the results of a parsed expression. """
def postParse ( self , instring , loc , tokenlist ) :
return [ ]
def suppress ( self ) :
return self
class OnlyOnce ( object ) :
""" Wrapper for parse actions, to ensure they are only called once. """
def __init__ ( self , methodCall ) :
self . callable = ParserElement . _normalizeParseActionArgs ( methodCall )
self . called = False
def __call__ ( self , s , l , t ) :
if not self . called :
results = self . callable ( s , l , t )
self . called = True
return results
raise ParseException ( s , l , " " )
def reset ( self ) :
self . called = False
def traceParseAction ( f ) :
""" Decorator for debugging parse actions. """
f = ParserElement . _normalizeParseActionArgs ( f )
def z ( * paArgs ) :
thisFunc = f . func_name
s , l , t = paArgs [ - 3 : ]
if len ( paArgs ) > 3 :
thisFunc = paArgs [ 0 ] . __class__ . __name__ + ' . ' + thisFunc
sys . stderr . write ( " >>entering %s (line: ' %s ' , %d , %s ) \n " % ( thisFunc , line ( l , s ) , l , t ) )
try :
ret = f ( * paArgs )
except Exception , exc :
sys . stderr . write ( " <<leaving %s (exception: %s ) \n " % ( thisFunc , exc ) )
raise
sys . stderr . write ( " <<leaving %s (ret: %s ) \n " % ( thisFunc , ret ) )
return ret
try :
z . __name__ = f . __name__
except AttributeError :
pass
return z
#
# global helpers
#
def delimitedList ( expr , delim = " , " , combine = False ) :
""" Helper to define a delimited list of expressions - the delimiter defaults to ' , ' .
By default , the list elements and delimiters can have intervening whitespace , and
comments , but this can be overridden by passing ' combine=True ' in the constructor .
If combine is set to True , the matching tokens are returned as a single token
string , with the delimiters included ; otherwise , the matching tokens are returned
as a list of tokens , with the delimiters suppressed .
"""
dlName = _ustr ( expr ) + " [ " + _ustr ( delim ) + " " + _ustr ( expr ) + " ]... "
if combine :
return Combine ( expr + ZeroOrMore ( delim + expr ) ) . setName ( dlName )
else :
return ( expr + ZeroOrMore ( Suppress ( delim ) + expr ) ) . setName ( dlName )
def countedArray ( expr ) :
""" Helper to define a counted list of expressions.
This helper defines a pattern of the form : :
integer expr expr expr . . .
where the leading integer tells how many expr expressions follow .
The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed .
"""
arrayExpr = Forward ( )
def countFieldParseAction ( s , l , t ) :
n = int ( t [ 0 ] )
arrayExpr << ( n and Group ( And ( [ expr ] * n ) ) or Group ( empty ) )
return [ ]
return ( Word ( nums ) . setName ( " arrayLen " ) . setParseAction ( countFieldParseAction , callDuringTry = True ) + arrayExpr )
def _flatten ( L ) :
if type ( L ) is not list : return [ L ]
if L == [ ] : return L
return _flatten ( L [ 0 ] ) + _flatten ( L [ 1 : ] )
def matchPreviousLiteral ( expr ) :
""" Helper to define an expression that is indirectly defined from
the tokens matched in a previous expression , that is , it looks
for a ' repeat ' of a previous expression . For example : :
first = Word ( nums )
second = matchPreviousLiteral ( first )
matchExpr = first + " : " + second
will match " 1:1 " , but not " 1:2 " . Because this matches a
previous literal , will also match the leading " 1:1 " in " 1:10 " .
If this is not desired , use matchPreviousExpr .
Do * not * use with packrat parsing enabled .
"""
rep = Forward ( )
def copyTokenToRepeater ( s , l , t ) :
if t :
if len ( t ) == 1 :
rep << t [ 0 ]
else :
# flatten t tokens
tflat = _flatten ( t . asList ( ) )
rep << And ( [ Literal ( tt ) for tt in tflat ] )
else :
rep << Empty ( )
expr . addParseAction ( copyTokenToRepeater , callDuringTry = True )
return rep
def matchPreviousExpr ( expr ) :
""" Helper to define an expression that is indirectly defined from
the tokens matched in a previous expression , that is , it looks
for a ' repeat ' of a previous expression . For example : :
first = Word ( nums )
second = matchPreviousExpr ( first )
matchExpr = first + " : " + second
will match " 1:1 " , but not " 1:2 " . Because this matches by
expressions , will * not * match the leading " 1:1 " in " 1:10 " ;
the expressions are evaluated first , and then compared , so
" 1 " is compared with " 10 " .
Do * not * use with packrat parsing enabled .
"""
rep = Forward ( )
e2 = expr . copy ( )
rep << e2
def copyTokenToRepeater ( s , l , t ) :
matchTokens = _flatten ( t . asList ( ) )
def mustMatchTheseTokens ( s , l , t ) :
theseTokens = _flatten ( t . asList ( ) )
if theseTokens != matchTokens :
raise ParseException ( " " , 0 , " " )
rep . setParseAction ( mustMatchTheseTokens , callDuringTry = True )
expr . addParseAction ( copyTokenToRepeater , callDuringTry = True )
return rep
def _escapeRegexRangeChars ( s ) :
#~ escape these chars: ^-]
for c in r " \ ^-] " :
s = s . replace ( c , _bslash + c )
s = s . replace ( " \n " , r " \ n " )
s = s . replace ( " \t " , r " \ t " )
return _ustr ( s )
def oneOf ( strs , caseless = False , useRegex = True ) :
""" Helper to quickly define a set of alternative Literals, and makes sure to do
longest - first testing when there is a conflict , regardless of the input order ,
but returns a MatchFirst for best performance .
Parameters :
- strs - a string of space - delimited literals , or a list of string literals
- caseless - ( default = False ) - treat all literals as caseless
- useRegex - ( default = True ) - as an optimization , will generate a Regex
object ; otherwise , will generate a MatchFirst object ( if caseless = True , or
if creating a Regex raises an exception )
"""
if caseless :
isequal = ( lambda a , b : a . upper ( ) == b . upper ( ) )
masks = ( lambda a , b : b . upper ( ) . startswith ( a . upper ( ) ) )
parseElementClass = CaselessLiteral
else :
isequal = ( lambda a , b : a == b )
masks = ( lambda a , b : b . startswith ( a ) )
parseElementClass = Literal
if isinstance ( strs , ( list , tuple ) ) :
symbols = list ( strs [ : ] )
elif isinstance ( strs , basestring ) :
symbols = strs . split ( )
else :
warnings . warn ( " Invalid argument to oneOf, expected string or list " ,
SyntaxWarning , stacklevel = 2 )
i = 0
while i < len ( symbols ) - 1 :
cur = symbols [ i ]
for j , other in enumerate ( symbols [ i + 1 : ] ) :
if ( isequal ( other , cur ) ) :
del symbols [ i + j + 1 ]
break
elif ( masks ( cur , other ) ) :
del symbols [ i + j + 1 ]
symbols . insert ( i , other )
cur = other
break
else :
i + = 1
if not caseless and useRegex :
#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
try :
if len ( symbols ) == len ( " " . join ( symbols ) ) :
2011-08-18 21:58:44 +00:00
return Regex ( " [ %s ] " % " " . join ( [ _escapeRegexRangeChars ( sym ) for sym in symbols ] ) )
2010-02-16 04:09:07 +00:00
else :
2011-08-18 21:58:44 +00:00
return Regex ( " | " . join ( [ re . escape ( sym ) for sym in symbols ] ) )
2010-02-16 04:09:07 +00:00
except :
warnings . warn ( " Exception creating Regex for oneOf, building MatchFirst " ,
SyntaxWarning , stacklevel = 2 )
# last resort, just use MatchFirst
return MatchFirst ( [ parseElementClass ( sym ) for sym in symbols ] )
def dictOf ( key , value ) :
""" Helper to easily and clearly define a dictionary by specifying the respective patterns
for the key and value . Takes care of defining the Dict , ZeroOrMore , and Group tokens
in the proper order . The key pattern can include delimiting markers or punctuation ,
as long as they are suppressed , thereby leaving the significant key text . The value
pattern can include named results , so that the Dict results can include named token
fields .
"""
return Dict ( ZeroOrMore ( Group ( key + value ) ) )
def originalTextFor ( expr , asString = True ) :
""" Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself , or to
revert separate tokens with intervening whitespace back to the original matching
input text . Simpler to use than the parse action keepOriginalText , and does not
require the inspect module to chase up the call stack . By default , returns a
string containing the original parsed text .
If the optional asString argument is passed as False , then the return value is a
ParseResults containing any results names that were originally matched , and a
single token containing the original matched text from the input string . So if
the expression passed to originalTextFor contains expressions with defined
results names , you must set asString to False if you want to preserve those
results name values . """
locMarker = Empty ( ) . setParseAction ( lambda s , loc , t : loc )
matchExpr = locMarker ( " _original_start " ) + expr + locMarker ( " _original_end " )
if asString :
extractText = lambda s , l , t : s [ t . _original_start : t . _original_end ]
else :
def extractText ( s , l , t ) :
del t [ : ]
t . insert ( 0 , s [ t . _original_start : t . _original_end ] )
del t [ " _original_start " ]
del t [ " _original_end " ]
matchExpr . setParseAction ( extractText )
return matchExpr
# convenience constants for positional expressions
empty = Empty ( ) . setName ( " empty " )
lineStart = LineStart ( ) . setName ( " lineStart " )
lineEnd = LineEnd ( ) . setName ( " lineEnd " )
stringStart = StringStart ( ) . setName ( " stringStart " )
stringEnd = StringEnd ( ) . setName ( " stringEnd " )
_escapedPunc = Word ( _bslash , r " \ []-*.$+^?()~ " , exact = 2 ) . setParseAction ( lambda s , l , t : t [ 0 ] [ 1 ] )
2011-08-18 21:58:44 +00:00
_printables_less_backslash = " " . join ( [ c for c in printables if c not in r " \ ] " ] )
2010-02-16 04:09:07 +00:00
_escapedHexChar = Combine ( Suppress ( _bslash + " 0x " ) + Word ( hexnums ) ) . setParseAction ( lambda s , l , t : unichr ( int ( t [ 0 ] , 16 ) ) )
_escapedOctChar = Combine ( Suppress ( _bslash ) + Word ( " 0 " , " 01234567 " ) ) . setParseAction ( lambda s , l , t : unichr ( int ( t [ 0 ] , 8 ) ) )
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word ( _printables_less_backslash , exact = 1 )
_charRange = Group ( _singleChar + Suppress ( " - " ) + _singleChar )
_reBracketExpr = Literal ( " [ " ) + Optional ( " ^ " ) . setResultsName ( " negate " ) + Group ( OneOrMore ( _charRange | _singleChar ) ) . setResultsName ( " body " ) + " ] "
2011-08-18 21:58:44 +00:00
_expanded = lambda p : ( isinstance ( p , ParseResults ) and ' ' . join ( [ unichr ( c ) for c in range ( ord ( p [ 0 ] ) , ord ( p [ 1 ] ) + 1 ) ] ) or p )
2010-02-16 04:09:07 +00:00
def srange ( s ) :
r """ Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp ' [] ' string range definitions : :
srange ( " [0-9] " ) - > " 0123456789 "
srange ( " [a-z] " ) - > " abcdefghijklmnopqrstuvwxyz "
srange ( " [a-z$_] " ) - > " abcdefghijklmnopqrstuvwxyz$_ "
The input string must be enclosed in [ ] ' s, and the returned string is the expanded
character set joined into a single string .
The values enclosed in the [ ] ' s may be::
a single character
an escaped character with a leading backslash ( such as \- or \] )
an escaped hex character with a leading ' \0 x ' ( \0x21 , which is a ' ! ' character )
an escaped octal character with a leading ' \0 ' ( \041 , which is a ' ! ' character )
a range of any of the above , separated by a dash ( ' a-z ' , etc . )
any combination of the above ( ' aeiouy ' , ' a-zA-Z0-9_$ ' , etc . )
"""
try :
2011-08-18 21:58:44 +00:00
return " " . join ( [ _expanded ( part ) for part in _reBracketExpr . parseString ( s ) . body ] )
2010-02-16 04:09:07 +00:00
except :
return " "
def matchOnlyAtCol ( n ) :
""" Helper method for defining parse actions that require matching at a specific
column in the input text .
"""
def verifyCol ( strg , locn , toks ) :
if col ( locn , strg ) != n :
raise ParseException ( strg , locn , " matched token not at column %d " % n )
return verifyCol
def replaceWith ( replStr ) :
""" Helper method for common parse actions that simply return a literal value. Especially
useful when used with transformString ( ) .
"""
def _replFunc ( * args ) :
return [ replStr ]
return _replFunc
def removeQuotes ( s , l , t ) :
""" Helper parse action for removing quotation marks from parsed quoted strings.
To use , add this parse action to quoted string using : :
quotedString . setParseAction ( removeQuotes )
"""
return t [ 0 ] [ 1 : - 1 ]
def upcaseTokens ( s , l , t ) :
""" Helper parse action to convert tokens to upper case. """
return [ tt . upper ( ) for tt in map ( _ustr , t ) ]
def downcaseTokens ( s , l , t ) :
""" Helper parse action to convert tokens to lower case. """
return [ tt . lower ( ) for tt in map ( _ustr , t ) ]
def keepOriginalText ( s , startLoc , t ) :
""" Helper parse action to preserve original parsed text,
overriding any nested parse actions . """
try :
endloc = getTokensEndLoc ( )
except ParseException :
raise ParseFatalException ( " incorrect usage of keepOriginalText - may only be called as a parse action " )
del t [ : ]
t + = ParseResults ( s [ startLoc : endloc ] )
return t
def getTokensEndLoc ( ) :
""" Method to be called from within a parse action to determine the end
location of the parsed tokens . """
import inspect
fstack = inspect . stack ( )
try :
# search up the stack (through intervening argument normalizers) for correct calling routine
for f in fstack [ 2 : ] :
if f [ 3 ] == " _parseNoCache " :
endloc = f [ 0 ] . f_locals [ " loc " ]
return endloc
else :
raise ParseFatalException ( " incorrect usage of getTokensEndLoc - may only be called from within a parse action " )
finally :
del fstack
def _makeTags ( tagStr , xml ) :
""" Internal helper to construct opening and closing tag expressions, given a tag name """
if isinstance ( tagStr , basestring ) :
resname = tagStr
tagStr = Keyword ( tagStr , caseless = not xml )
else :
resname = tagStr . name
tagAttrName = Word ( alphas , alphanums + " _-: " )
if ( xml ) :
tagAttrValue = dblQuotedString . copy ( ) . setParseAction ( removeQuotes )
openTag = Suppress ( " < " ) + tagStr + \
Dict ( ZeroOrMore ( Group ( tagAttrName + Suppress ( " = " ) + tagAttrValue ) ) ) + \
Optional ( " / " , default = [ False ] ) . setResultsName ( " empty " ) . setParseAction ( lambda s , l , t : t [ 0 ] == ' / ' ) + Suppress ( " > " )
else :
2011-08-18 21:58:44 +00:00
printablesLessRAbrack = " " . join ( [ c for c in printables if c not in " > " ] )
2010-02-16 04:09:07 +00:00
tagAttrValue = quotedString . copy ( ) . setParseAction ( removeQuotes ) | Word ( printablesLessRAbrack )
openTag = Suppress ( " < " ) + tagStr + \
Dict ( ZeroOrMore ( Group ( tagAttrName . setParseAction ( downcaseTokens ) + \
Optional ( Suppress ( " = " ) + tagAttrValue ) ) ) ) + \
Optional ( " / " , default = [ False ] ) . setResultsName ( " empty " ) . setParseAction ( lambda s , l , t : t [ 0 ] == ' / ' ) + Suppress ( " > " )
closeTag = Combine ( _L ( " </ " ) + tagStr + " > " )
openTag = openTag . setResultsName ( " start " + " " . join ( resname . replace ( " : " , " " ) . title ( ) . split ( ) ) ) . setName ( " < %s > " % tagStr )
closeTag = closeTag . setResultsName ( " end " + " " . join ( resname . replace ( " : " , " " ) . title ( ) . split ( ) ) ) . setName ( " </ %s > " % tagStr )
return openTag , closeTag
def makeHTMLTags ( tagStr ) :
""" Helper to construct opening and closing tag expressions for HTML, given a tag name """
return _makeTags ( tagStr , False )
def makeXMLTags ( tagStr ) :
""" Helper to construct opening and closing tag expressions for XML, given a tag name """
return _makeTags ( tagStr , True )
def withAttribute ( * args , * * attrDict ) :
""" Helper to create a validating parse action to be used with start tags created
with makeXMLTags or makeHTMLTags . Use withAttribute to qualify a starting tag
with a required attribute value , to avoid false matches on common tags such as
< TD > or < DIV > .
Call withAttribute with a series of attribute names and values . Specify the list
of filter attributes names and values as :
- keyword arguments , as in ( class = " Customer " , align = " right " ) , or
- a list of name - value tuples , as in ( ( " ns1:class " , " Customer " ) , ( " ns2:align " , " right " ) )
For attribute names with a namespace prefix , you must use the second form . Attribute
names are matched insensitive to upper / lower case .
To verify that the attribute exists , but without specifying a value , pass
withAttribute . ANY_VALUE as the value .
"""
if args :
attrs = args [ : ]
else :
attrs = attrDict . items ( )
attrs = [ ( k , v ) for k , v in attrs ]
def pa ( s , l , tokens ) :
for attrName , attrValue in attrs :
if attrName not in tokens :
raise ParseException ( s , l , " no matching attribute " + attrName )
if attrValue != withAttribute . ANY_VALUE and tokens [ attrName ] != attrValue :
raise ParseException ( s , l , " attribute ' %s ' has value ' %s ' , must be ' %s ' " %
( attrName , tokens [ attrName ] , attrValue ) )
return pa
withAttribute . ANY_VALUE = object ( )
opAssoc = _Constants ( )
opAssoc . LEFT = object ( )
opAssoc . RIGHT = object ( )
def operatorPrecedence ( baseExpr , opList ) :
""" Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy . Operators may be unary or
binary , left - or right - associative . Parse actions can also be attached
to operator expressions .
Parameters :
- baseExpr - expression representing the most basic element for the nested
- opList - list of tuples , one for each operator precedence level in the
expression grammar ; each tuple is of the form
( opExpr , numTerms , rightLeftAssoc , parseAction ) , where :
- opExpr is the pyparsing expression for the operator ;
may also be a string , which will be converted to a Literal ;
if numTerms is 3 , opExpr is a tuple of two expressions , for the
two operators separating the 3 terms
- numTerms is the number of terms for this operator ( must
be 1 , 2 , or 3 )
- rightLeftAssoc is the indicator whether the operator is
right or left associative , using the pyparsing - defined
constants opAssoc . RIGHT and opAssoc . LEFT .
- parseAction is the parse action to be associated with
expressions matching this operator expression ( the
parse action tuple member may be omitted )
"""
ret = Forward ( )
lastExpr = baseExpr | ( Suppress ( ' ( ' ) + ret + Suppress ( ' ) ' ) )
for i , operDef in enumerate ( opList ) :
opExpr , arity , rightLeftAssoc , pa = ( operDef + ( None , ) ) [ : 4 ]
if arity == 3 :
if opExpr is None or len ( opExpr ) != 2 :
raise ValueError ( " if numterms=3, opExpr must be a tuple or list of two expressions " )
opExpr1 , opExpr2 = opExpr
thisExpr = Forward ( ) #.setName("expr%d" % i)
if rightLeftAssoc == opAssoc . LEFT :
if arity == 1 :
matchExpr = FollowedBy ( lastExpr + opExpr ) + Group ( lastExpr + OneOrMore ( opExpr ) )
elif arity == 2 :
if opExpr is not None :
matchExpr = FollowedBy ( lastExpr + opExpr + lastExpr ) + Group ( lastExpr + OneOrMore ( opExpr + lastExpr ) )
else :
matchExpr = FollowedBy ( lastExpr + lastExpr ) + Group ( lastExpr + OneOrMore ( lastExpr ) )
elif arity == 3 :
matchExpr = FollowedBy ( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + \
Group ( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
else :
raise ValueError ( " operator must be unary (1), binary (2), or ternary (3) " )
elif rightLeftAssoc == opAssoc . RIGHT :
if arity == 1 :
# try to avoid LR with this extra test
if not isinstance ( opExpr , Optional ) :
opExpr = Optional ( opExpr )
matchExpr = FollowedBy ( opExpr . expr + thisExpr ) + Group ( opExpr + thisExpr )
elif arity == 2 :
if opExpr is not None :
matchExpr = FollowedBy ( lastExpr + opExpr + thisExpr ) + Group ( lastExpr + OneOrMore ( opExpr + thisExpr ) )
else :
matchExpr = FollowedBy ( lastExpr + thisExpr ) + Group ( lastExpr + OneOrMore ( thisExpr ) )
elif arity == 3 :
matchExpr = FollowedBy ( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + \
Group ( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
else :
raise ValueError ( " operator must be unary (1), binary (2), or ternary (3) " )
else :
raise ValueError ( " operator must indicate right or left associativity " )
if pa :
matchExpr . setParseAction ( pa )
thisExpr << ( matchExpr | lastExpr )
lastExpr = thisExpr
ret << lastExpr
return ret
dblQuotedString = Regex ( r ' " (?:[^ " \ n \ r \\ ]|(?: " " )|(?: \\ x[0-9a-fA-F]+)|(?: \\ .))* " ' ) . setName ( " string enclosed in double quotes " )
sglQuotedString = Regex ( r " ' (?:[^ ' \ n \ r \\ ]|(?: ' ' )|(?: \\ x[0-9a-fA-F]+)|(?: \\ .))* ' " ) . setName ( " string enclosed in single quotes " )
quotedString = Regex ( r ''' (?: " (?:[^ " \ n \ r \ \ ]|(?: " " )|(?: \ \ x[0-9a-fA-F]+)|(?: \ \ .))* " )|(?: ' (?:[^ ' \ n \ r \ \ ]|(?: ' ' )|(?: \ \ x[0-9a-fA-F]+)|(?: \ \ .))* ' ) ''' ) . setName ( " quotedString using single or double quotes " )
unicodeString = Combine ( _L ( ' u ' ) + quotedString . copy ( ) )
def nestedExpr ( opener = " ( " , closer = " ) " , content = None , ignoreExpr = quotedString ) :
""" Helper method for defining nested lists enclosed in opening and closing
delimiters ( " ( " and " ) " are the default ) .
Parameters :
- opener - opening character for a nested list ( default = " ( " ) ; can also be a pyparsing expression
- closer - closing character for a nested list ( default = " ) " ) ; can also be a pyparsing expression
- content - expression for items within the nested lists ( default = None )
- ignoreExpr - expression for ignoring opening and closing delimiters ( default = quotedString )
If an expression is not provided for the content argument , the nested
expression will capture all whitespace - delimited content between delimiters
as a list of separate values .
Use the ignoreExpr argument to define expressions that may contain
opening or closing characters that should not be treated as opening
or closing characters for nesting , such as quotedString or a comment
expression . Specify multiple expressions using an Or or MatchFirst .
The default is quotedString , but if no expressions are to be ignored ,
then pass None for this argument .
"""
if opener == closer :
raise ValueError ( " opening and closing strings cannot be the same " )
if content is None :
if isinstance ( opener , basestring ) and isinstance ( closer , basestring ) :
if len ( opener ) == 1 and len ( closer ) == 1 :
if ignoreExpr is not None :
content = ( Combine ( OneOrMore ( ~ ignoreExpr +
CharsNotIn ( opener + closer + ParserElement . DEFAULT_WHITE_CHARS , exact = 1 ) )
) . setParseAction ( lambda t : t [ 0 ] . strip ( ) ) )
else :
content = ( empty + CharsNotIn ( opener + closer + ParserElement . DEFAULT_WHITE_CHARS
) . setParseAction ( lambda t : t [ 0 ] . strip ( ) ) )
else :
if ignoreExpr is not None :
content = ( Combine ( OneOrMore ( ~ ignoreExpr +
~ Literal ( opener ) + ~ Literal ( closer ) +
CharsNotIn ( ParserElement . DEFAULT_WHITE_CHARS , exact = 1 ) )
) . setParseAction ( lambda t : t [ 0 ] . strip ( ) ) )
else :
content = ( Combine ( OneOrMore ( ~ Literal ( opener ) + ~ Literal ( closer ) +
CharsNotIn ( ParserElement . DEFAULT_WHITE_CHARS , exact = 1 ) )
) . setParseAction ( lambda t : t [ 0 ] . strip ( ) ) )
else :
raise ValueError ( " opening and closing arguments must be strings if no content expression is given " )
ret = Forward ( )
if ignoreExpr is not None :
ret << Group ( Suppress ( opener ) + ZeroOrMore ( ignoreExpr | ret | content ) + Suppress ( closer ) )
else :
ret << Group ( Suppress ( opener ) + ZeroOrMore ( ret | content ) + Suppress ( closer ) )
return ret
def indentedBlock ( blockStatementExpr , indentStack , indent = True ) :
""" Helper method for defining space-delimited indentation blocks, such as
those used to define block statements in Python source code .
Parameters :
- blockStatementExpr - expression defining syntax of statement that
is repeated within the indented block
- indentStack - list created by caller to manage indentation stack
( multiple statementWithIndentedBlock expressions within a single grammar
should share a common indentStack )
- indent - boolean indicating whether block must be indented beyond the
the current level ; set to False for block of left - most statements
( default = True )
A valid block must contain at least one blockStatement .
"""
def checkPeerIndent ( s , l , t ) :
if l > = len ( s ) : return
curCol = col ( l , s )
if curCol != indentStack [ - 1 ] :
if curCol > indentStack [ - 1 ] :
raise ParseFatalException ( s , l , " illegal nesting " )
raise ParseException ( s , l , " not a peer entry " )
def checkSubIndent ( s , l , t ) :
curCol = col ( l , s )
if curCol > indentStack [ - 1 ] :
indentStack . append ( curCol )
else :
raise ParseException ( s , l , " not a subentry " )
def checkUnindent ( s , l , t ) :
if l > = len ( s ) : return
curCol = col ( l , s )
if not ( indentStack and curCol < indentStack [ - 1 ] and curCol < = indentStack [ - 2 ] ) :
raise ParseException ( s , l , " not an unindent " )
indentStack . pop ( )
NL = OneOrMore ( LineEnd ( ) . setWhitespaceChars ( " \t " ) . suppress ( ) )
INDENT = Empty ( ) + Empty ( ) . setParseAction ( checkSubIndent )
PEER = Empty ( ) . setParseAction ( checkPeerIndent )
UNDENT = Empty ( ) . setParseAction ( checkUnindent )
if indent :
smExpr = Group ( Optional ( NL ) +
FollowedBy ( blockStatementExpr ) +
INDENT + ( OneOrMore ( PEER + Group ( blockStatementExpr ) + Optional ( NL ) ) ) + UNDENT )
else :
smExpr = Group ( Optional ( NL ) +
( OneOrMore ( PEER + Group ( blockStatementExpr ) + Optional ( NL ) ) ) )
blockStatementExpr . ignore ( _bslash + LineEnd ( ) )
return smExpr
alphas8bit = srange ( r " [ \ 0xc0- \ 0xd6 \ 0xd8- \ 0xf6 \ 0xf8- \ 0xff] " )
punc8bit = srange ( r " [ \ 0xa1- \ 0xbf \ 0xd7 \ 0xf7] " )
anyOpenTag , anyCloseTag = makeHTMLTags ( Word ( alphas , alphanums + " _: " ) )
commonHTMLEntity = Combine ( _L ( " & " ) + oneOf ( " gt lt amp nbsp quot " ) . setResultsName ( " entity " ) + " ; " ) . streamline ( )
_htmlEntityMap = dict ( zip ( " gt lt amp nbsp quot " . split ( ) , ' ><& " ' ) )
replaceHTMLEntity = lambda t : t . entity in _htmlEntityMap and _htmlEntityMap [ t . entity ] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Regex ( r " / \ *(?:[^*]* \ *+)+?/ " ) . setName ( " C style comment " )
htmlComment = Regex ( r " <!--[ \ s \ S]*?--> " )
restOfLine = Regex ( r " .* " ) . leaveWhitespace ( )
dblSlashComment = Regex ( r " \ / \ /( \\ \ n|.)* " ) . setName ( " // comment " )
cppStyleComment = Regex ( r " /(?: \ *(?:[^*]* \ *+)+?/|/[^ \ n]*(?: \ n[^ \ n]*)*?(?:(?<! \\ )| \ Z)) " ) . setName ( " C++ style comment " )
javaStyleComment = cppStyleComment
pythonStyleComment = Regex ( r " #.* " ) . setName ( " Python style comment " )
2011-08-18 21:58:44 +00:00
_noncomma = " " . join ( [ c for c in printables if c != " , " ] )
2010-02-16 04:09:07 +00:00
_commasepitem = Combine ( OneOrMore ( Word ( _noncomma ) +
Optional ( Word ( " \t " ) +
~ Literal ( " , " ) + ~ LineEnd ( ) ) ) ) . streamline ( ) . setName ( " commaItem " )
commaSeparatedList = delimitedList ( Optional ( quotedString | _commasepitem , default = " " ) ) . setName ( " commaSeparatedList " )
if __name__ == " __main__ " :
def test ( teststring ) :
try :
tokens = simpleSQL . parseString ( teststring )
tokenlist = tokens . asList ( )
print ( teststring + " -> " + str ( tokenlist ) )
print ( " tokens = " + str ( tokens ) )
print ( " tokens.columns = " + str ( tokens . columns ) )
print ( " tokens.tables = " + str ( tokens . tables ) )
print ( tokens . asXML ( " SQL " , True ) )
except ParseBaseException , err :
print ( teststring + " -> " )
print ( err . line )
print ( " " * ( err . column - 1 ) + " ^ " )
print ( err )
print ( )
selectToken = CaselessLiteral ( " select " )
fromToken = CaselessLiteral ( " from " )
ident = Word ( alphas , alphanums + " _$ " )
columnName = delimitedList ( ident , " . " , combine = True ) . setParseAction ( upcaseTokens )
columnNameList = Group ( delimitedList ( columnName ) ) #.setName("columns")
tableName = delimitedList ( ident , " . " , combine = True ) . setParseAction ( upcaseTokens )
tableNameList = Group ( delimitedList ( tableName ) ) #.setName("tables")
simpleSQL = ( selectToken + \
( ' * ' | columnNameList ) . setResultsName ( " columns " ) + \
fromToken + \
tableNameList . setResultsName ( " tables " ) )
test ( " SELECT * from XYZZY, ABC " )
test ( " select * from SYS.XYZZY " )
test ( " Select A from Sys.dual " )
test ( " Select AA,BB,CC from Sys.dual " )
test ( " Select A, B, C from Sys.dual " )
test ( " Select A, B, C from Sys.dual " )
test ( " Xelect A, B, C from Sys.dual " )
test ( " Select A, B, C frox Sys.dual " )
test ( " Select " )
test ( " Select ^^^ frox Sys.dual " )
test ( " Select A, B, C from Sys.dual, Table2 " )