def get(self, key, defaultValue=None):
"""
Returns named result matching the given key, or if there is no
such name, then returns the given C{defaultValue} or C{None} if no
C{defaultValue} is specified.
Similar to C{dict.get()}.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString("1999/12/31")
print(result.get("year")) # -> '1999'
print(result.get("hour", "not specified")) # -> 'not specified'
print(result.get("hour")) # -> None
"""
if key in self:
return self[key]
else:
return defaultValue
python类Word()的实例源码
def insert( self, index, insStr ):
"""
Inserts new element at location index in the list of parsed tokens.
Similar to C{list.insert()}.
Example::
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
# use a parse action to insert the parse location in the front of the parsed results
def insert_locn(locn, tokens):
tokens.insert(0, locn)
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
"""
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ):
"""
Add sequence of elements to end of ParseResults list of elements.
Example::
patt = OneOrMore(Word(alphas))
# use a parse action to append the reverse of the matched strings, to make a palindrome
def make_palindrome(tokens):
tokens.extend(reversed([t[::-1] for t in tokens]))
return ''.join(tokens)
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
"""
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs):
"""
Pretty-printer for parsed results as a list, using the C{pprint} module.
Accepts additional positional or keyword args as defined for the
C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
Example::
ident = Word(alphas, alphanums)
num = Word(nums)
func = Forward()
term = ident | num | Group('(' + func + ')')
func <<= ident + Group(Optional(delimitedList(term)))
result = func.parseString("fna a,b,(fnb c,d,200),100")
result.pprint(width=40)
prints::
['fna',
['a',
'b',
['(', 'fnb', ['c', 'd', '200'], ')'],
'100']]
"""
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol
def copy( self ):
"""
Make a copy of this C{ParserElement}. Useful for defining different parse actions
for the same parsing pattern, using copies of the original parse element.
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
prints::
[5120, 100, 655360, 268435456]
Equivalent form of C{expr.copy()} is just C{expr()}::
integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
"""
cpy = copy.copy( self )
cpy.parseAction = self.parseAction[:]
cpy.ignoreExprs = self.ignoreExprs[:]
if self.copyDefaultWhiteChars:
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
return cpy
def __add__(self, other ):
"""
Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
converts them to L{Literal}s by default.
Example::
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString(hello))
Prints::
Hello, World! -> ['Hello', ',', 'World', '!']
"""
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
return None
return And( [ self, other ] )
def __call__(self, name=None):
"""
Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
passed as C{True}.
If C{name} is omitted, same as calling C{L{copy}}.
Example::
# these are equivalent
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
"""
if name is not None:
return self.setResultsName(name)
else:
return self.copy()
def ignore( self, other ):
"""
Define expression to be ignored (e.g., comments) while doing pattern
matching; may be called repeatedly, to define multiple comment or other
ignorable patterns.
Example::
patt = OneOrMore(Word(alphas))
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
patt.ignore(cStyleComment)
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
"""
if isinstance(other, basestring):
other = Suppress(other)
if isinstance( other, Suppress ):
if other not in self.ignoreExprs:
self.ignoreExprs.append(other)
else:
self.ignoreExprs.append( Suppress( other.copy() ) )
return self
def __str__( self ):
try:
return super(Word,self).__str__()
except Exception:
pass
if self.strRepr is None:
def charsAsStr(s):
if len(s)>4:
return s[:4]+"..."
else:
return s
if ( self.initCharsOrig != self.bodyCharsOrig ):
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
else:
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
return self.strRepr
def delimitedList( expr, delim=",", combine=False ):
"""
Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed.
Example::
delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
"""
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
if combine:
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
else:
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def locatedExpr(expr):
"""
Helper to decorate a returned token with its starting and ending locations in the input string.
This helper adds the following results names:
- locn_start = location where matched expression begins
- locn_end = location where matched expression ends
- value = the actual parsed results
Be careful if the input text contains C{<TAB>} characters, you may want to call
C{L{ParserElement.parseWithTabs}}
Example::
wd = Word(alphas)
for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
print(match)
prints::
[[0, 'ljsdf', 5]]
[[8, 'lksdjjf', 15]]
[[18, 'lkkjj', 23]]
"""
locator = Empty().setParseAction(lambda s,l,t: l)
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
# convenience constants for positional expressions
def srange(s):
r"""
Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp '[]' string range definitions::
srange("[0-9]") -> "0123456789"
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
The input string must be enclosed in []'s, and the returned string is the expanded
character set joined into a single string.
The values enclosed in the []'s may be:
- a single character
- an escaped character with a leading backslash (such as C{\-} or C{\]})
- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
(C{\0x##} is also supported for backwards compatibility)
- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
- a range of any of the above, separated by a dash (C{'a-z'}, etc.)
- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
"""
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
try:
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
except Exception:
return ""
def append( self, item ):
"""
Add single element to end of ParseResults list of elements.
Example::
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
# use a parse action to compute the sum of the parsed integers, and add it to the end
def append_sum(tokens):
tokens.append(sum(map(int, tokens)))
print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
"""
self.__toklist.append(item)
def asDict( self ):
"""
Returns the named parse results as a nested dictionary.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString('12/31/1999')
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
result_dict = result.asDict()
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
import json
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
"""
if PY_3:
item_fn = self.items
else:
item_fn = self.iteritems
def toItem(obj):
if isinstance(obj, ParseResults):
if obj.haskeys():
return obj.asDict()
else:
return [toItem(v) for v in obj]
else:
return obj
return dict((k,toItem(v)) for k,v in item_fn())
def getName(self):
r"""
Returns the results name for this token expression. Useful when several
different expressions might match at a particular location.
Example::
integer = Word(nums)
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
house_number_expr = Suppress('#') + Word(nums, alphanums)
user_data = (Group(house_number_expr)("house_number")
| Group(ssn_expr)("ssn")
| Group(integer)("age"))
user_info = OneOrMore(user_data)
result = user_info.parseString("22 111-22-3333 #221B")
for item in result:
print(item.getName(), ':', item[0])
prints::
age : 22
ssn : 111-22-3333
house_number : 221B
"""
if self.__name:
return self.__name
elif self.__parent:
par = self.__parent()
if par:
return par.__lookup(self)
else:
return None
elif (len(self) == 1 and
len(self.__tokdict) == 1 and
next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
return next(iter(self.__tokdict.keys()))
else:
return None
def setDefaultWhitespaceChars( chars ):
r"""
Overrides the default whitespace chars
Example::
# default whitespace chars are space, <TAB> and newline
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
# change to just treat newline as significant
ParserElement.setDefaultWhitespaceChars(" \t")
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
"""
ParserElement.DEFAULT_WHITE_CHARS = chars
def setName( self, name ):
"""
Define name for this expression, makes debugging and exception messages clearer.
Example::
Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
"""
self.name = name
self.errmsg = "Expected " + self.name
if hasattr(self,"exception"):
self.exception.msg = self.errmsg
return self
def setParseAction( self, *fns, **kwargs ):
"""
Define one or more actions to perform when successfully matching parse element definition.
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- s = the original string being parsed (see note below)
- loc = the location of the matching substring
- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value.
Optional keyword arguments:
- callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
Example::
integer = Word(nums)
date_str = integer + '/' + integer + '/' + integer
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
# use parse action to convert to ints at parse time
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
date_str = integer + '/' + integer + '/' + integer
# note that integer fields are now ints, not strings
date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
"""
self.parseAction = list(map(_trim_arity, list(fns)))
self.callDuringTry = kwargs.get("callDuringTry", False)
return self
def addCondition(self, *fns, **kwargs):
"""Add a boolean predicate function to expression's list of parse actions. See
L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
functions passed to C{addCondition} need to return boolean success/fail of the condition.
Optional keyword arguments:
- message = define a custom message to be used in the raised exception
- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
year_int = integer.copy()
year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
date_str = year_int + '/' + integer + '/' + integer
result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
"""
msg = kwargs.get("message", "failed user-defined condition")
exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
for fn in fns:
def pa(s,l,t):
if not bool(_trim_arity(fn)(s,l,t)):
raise exc_type(s,l,msg)
self.parseAction.append(pa)
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
return self
def searchString( self, instring, maxMatches=_MAX_INT ):
"""
Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
Example::
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
cap_word = Word(alphas.upper(), alphas.lower())
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
# the sum() builtin can be used to merge results into a single ParseResults object
print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
prints::
[['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException as exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def setDebug( self, flag=True ):
"""
Enable display of debugging messages while doing pattern matching.
Set C{flag} to True to enable, False to disable.
Example::
wd = Word(alphas).setName("alphaword")
integer = Word(nums).setName("numword")
term = wd | integer
# turn on debugging for wd
wd.setDebug()
OneOrMore(term).parseString("abc 123 xyz 890")
prints::
Match alphaword at loc 0(1,1)
Matched alphaword -> ['abc']
Match alphaword at loc 3(1,4)
Exception raised:Expected alphaword (at char 4), (line:1, col:5)
Match alphaword at loc 7(1,8)
Matched alphaword -> ['xyz']
Match alphaword at loc 11(1,12)
Exception raised:Expected alphaword (at char 12), (line:1, col:13)
Match alphaword at loc 15(1,16)
Exception raised:Expected alphaword (at char 15), (line:1, col:16)
The output shown is that produced by the default debug actions - custom debug actions can be
specified using L{setDebugActions}. Prior to attempting
to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
which makes debugging and exception messages easier to understand - for instance, the default
name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
"""
if flag:
self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
else:
self.debug = False
return self
def countedArray( expr, intExpr=None ):
"""
Helper to define a counted list of expressions.
This helper defines a pattern of the form::
integer expr expr expr...
where the leading integer tells how many expr expressions follow.
The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
Example::
countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
# in this parser, the leading integer value is given in binary,
# '10' indicating that 2 values are in the array
binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
"""
arrayExpr = Forward()
def countFieldParseAction(s,l,t):
n = t[0]
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
return []
if intExpr is None:
intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
else:
intExpr = intExpr.copy()
intExpr.setName("arrayLen")
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
def matchPreviousExpr(expr):
"""
Helper to define an expression that is indirectly defined from
the tokens matched in a previous expression, that is, it looks
for a 'repeat' of a previous expression. For example::
first = Word(nums)
second = matchPreviousExpr(first)
matchExpr = first + ":" + second
will match C{"1:1"}, but not C{"1:2"}. Because this matches by
expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
the expressions are evaluated first, and then compared, so
C{"1"} is compared with C{"10"}.
Do I{not} use with packrat parsing enabled.
"""
rep = Forward()
e2 = expr.copy()
rep <<= e2
def copyTokenToRepeater(s,l,t):
matchTokens = _flatten(t.asList())
def mustMatchTheseTokens(s,l,t):
theseTokens = _flatten(t.asList())
if theseTokens != matchTokens:
raise ParseException("",0,"")
rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
rep.setName('(prev) ' + _ustr(expr))
return rep
def replaceWith(replStr):
"""
Helper method for common parse actions that simply return a literal value. Especially
useful when used with C{L{transformString<ParserElement.transformString>}()}.
Example::
num = Word(nums).setParseAction(lambda toks: int(toks[0]))
na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
term = na | num
OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
"""
return lambda s,l,t: [replStr]
def pop( self, *args, **kwargs):
"""
Removes and returns item at specified index (default=C{last}).
Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
argument or an integer argument, it will use C{list} semantics
and pop tokens from the list of parsed tokens. If passed a
non-integer argument (most likely a string), it will use C{dict}
semantics and pop the corresponding value from any defined
results names. A second default return value argument is
supported, just as in C{dict.pop()}.
Example::
def remove_first(tokens):
tokens.pop(0)
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
label = Word(alphas)
patt = label("LABEL") + OneOrMore(Word(nums))
print(patt.parseString("AAB 123 321").dump())
# Use pop() in a parse action to remove named result (note that corresponding value is not
# removed from list form of results)
def remove_LABEL(tokens):
tokens.pop("LABEL")
return tokens
patt.addParseAction(remove_LABEL)
print(patt.parseString("AAB 123 321").dump())
prints::
['AAB', '123', '321']
- LABEL: AAB
['AAB', '123', '321']
"""
if not args:
args = [-1]
for k,v in kwargs.items():
if k == 'default':
args = (args[0], v)
else:
raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
if (isinstance(args[0], int) or
len(args) == 1 or
args[0] in self):
index = args[0]
ret = self[index]
del self[index]
return ret
else:
defaultvalue = args[1]
return defaultvalue
def dump(self, indent='', depth=0, full=True):
"""
Diagnostic method for listing out the contents of a C{ParseResults}.
Accepts an optional C{indent} argument so that this string can be embedded
in a nested display of other data.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString('12/31/1999')
print(result.dump())
prints::
['12', '/', '31', '/', '1999']
- day: 1999
- month: 31
- year: 12
"""
out = []
NL = '\n'
out.append( indent+_ustr(self.asList()) )
if full:
if self.haskeys():
items = sorted((str(k), v) for k,v in self.items())
for k,v in items:
if out:
out.append(NL)
out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
if isinstance(v,ParseResults):
if v:
out.append( v.dump(indent,depth+1) )
else:
out.append(_ustr(v))
else:
out.append(repr(v))
elif any(isinstance(vv,ParseResults) for vv in self):
v = self
for i,vv in enumerate(v):
if isinstance(vv,ParseResults):
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
else:
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
return "".join(out)
def parseString( self, instring, parseAll=False ):
"""
Execute the parse expression with the given string.
This is the main interface to the client code, once the complete
expression has been built.
If you want the grammar to require that the entire input string be
successfully parsed, then set C{parseAll} to True (equivalent to ending
the grammar with C{L{StringEnd()}}).
Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
in order to report proper column numbers in parse actions.
If the input string contains tabs and
the grammar uses parse actions that use the C{loc} argument to index into the
string being parsed, you can ensure you have a consistent view of the input
string by:
- calling C{parseWithTabs} on your grammar before calling C{parseString}
(see L{I{parseWithTabs}<parseWithTabs>})
- define your parse action using the full C{(s,loc,toks)} signature, and
reference the input string using the parse action's C{s} argument
- explictly expand the tabs in your input string before calling
C{parseString}
Example::
Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
"""
ParserElement.resetCache()
if not self.streamlined:
self.streamline()
#~ self.saveAsList = True
for e in self.ignoreExprs:
e.streamline()
if not self.keepTabs:
instring = instring.expandtabs()
try:
loc, tokens = self._parse( instring, 0 )
if parseAll:
loc = self.preParse( instring, loc )
se = Empty() + StringEnd()
se._parse( instring, loc )
except ParseBaseException as exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
else:
return tokens
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
super(Word,self).__init__()
if excludeChars:
initChars = ''.join(c for c in initChars if c not in excludeChars)
if bodyChars:
bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
self.initCharsOrig = initChars
self.initChars = set(initChars)
if bodyChars :
self.bodyCharsOrig = bodyChars
self.bodyChars = set(bodyChars)
else:
self.bodyCharsOrig = initChars
self.bodyChars = set(initChars)
self.maxSpecified = max > 0
if min < 1:
raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
self.minLen = min
if max > 0:
self.maxLen = max
else:
self.maxLen = _MAX_INT
if exact > 0:
self.maxLen = exact
self.minLen = exact
self.name = _ustr(self)
self.errmsg = "Expected " + self.name
self.mayIndexError = False
self.asKeyword = asKeyword
if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
if self.bodyCharsOrig == self.initCharsOrig:
self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
elif len(self.initCharsOrig) == 1:
self.reString = "%s[%s]*" % \
(re.escape(self.initCharsOrig),
_escapeRegexRangeChars(self.bodyCharsOrig),)
else:
self.reString = "[%s][%s]*" % \
(_escapeRegexRangeChars(self.initCharsOrig),
_escapeRegexRangeChars(self.bodyCharsOrig),)
if self.asKeyword:
self.reString = r"\b"+self.reString+r"\b"
try:
self.re = re.compile( self.reString )
except Exception:
self.re = None
def traceParseAction(f):
"""
Decorator for debugging parse actions.
When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
Example::
wd = Word(alphas)
@traceParseAction
def remove_duplicate_chars(tokens):
return ''.join(sorted(set(''.join(tokens)))
wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
prints::
>>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
<<leaving remove_duplicate_chars (ret: 'dfjkls')
['dfjkls']
"""
f = _trim_arity(f)
def z(*paArgs):
thisFunc = f.__name__
s,l,t = paArgs[-3:]
if len(paArgs)>3:
thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
try:
ret = f(*paArgs)
except Exception as exc:
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
raise
sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
return ret
try:
z.__name__ = f.__name__
except AttributeError:
pass
return z
#
# global helpers
#
def tokenMap(func, *args):
"""
Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
args are passed, they are forwarded to the given function as additional arguments after
the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
parsed data to an integer using base 16.
Example (compare the last to example in L{ParserElement.transformString}::
hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
hex_ints.runTests('''
00 11 22 aa FF 0a 0d 1a
''')
upperword = Word(alphas).setParseAction(tokenMap(str.upper))
OneOrMore(upperword).runTests('''
my kingdom for a horse
''')
wd = Word(alphas).setParseAction(tokenMap(str.title))
OneOrMore(wd).setParseAction(' '.join).runTests('''
now is the winter of our discontent made glorious summer by this sun of york
''')
prints::
00 11 22 aa FF 0a 0d 1a
[0, 17, 34, 170, 255, 10, 13, 26]
my kingdom for a horse
['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
now is the winter of our discontent made glorious summer by this sun of york
['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
"""
def pa(s,l,t):
return [func(tokn, *args) for tokn in t]
try:
func_name = getattr(func, '__name__',
getattr(func, '__class__').__name__)
except Exception:
func_name = str(func)
pa.__name__ = func_name
return pa