def get(self, key, defaultValue=None):
"""
Returns named result matching the given key, or if there is no
such name, then returns the given C{defaultValue} or C{None} if no
C{defaultValue} is specified.
Similar to C{dict.get()}.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString("1999/12/31")
print(result.get("year")) # -> '1999'
print(result.get("hour", "not specified")) # -> 'not specified'
print(result.get("hour")) # -> None
"""
if key in self:
return self[key]
else:
return defaultValue
python类Word()的实例源码
def insert( self, index, insStr ):
"""
Inserts new element at location index in the list of parsed tokens.
Similar to C{list.insert()}.
Example::
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
# use a parse action to insert the parse location in the front of the parsed results
def insert_locn(locn, tokens):
tokens.insert(0, locn)
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
"""
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ):
"""
Add sequence of elements to end of ParseResults list of elements.
Example::
patt = OneOrMore(Word(alphas))
# use a parse action to append the reverse of the matched strings, to make a palindrome
def make_palindrome(tokens):
tokens.extend(reversed([t[::-1] for t in tokens]))
return ''.join(tokens)
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
"""
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs):
"""
Pretty-printer for parsed results as a list, using the C{pprint} module.
Accepts additional positional or keyword args as defined for the
C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
Example::
ident = Word(alphas, alphanums)
num = Word(nums)
func = Forward()
term = ident | num | Group('(' + func + ')')
func <<= ident + Group(Optional(delimitedList(term)))
result = func.parseString("fna a,b,(fnb c,d,200),100")
result.pprint(width=40)
prints::
['fna',
['a',
'b',
['(', 'fnb', ['c', 'd', '200'], ')'],
'100']]
"""
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol
def copy( self ):
"""
Make a copy of this C{ParserElement}. Useful for defining different parse actions
for the same parsing pattern, using copies of the original parse element.
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
prints::
[5120, 100, 655360, 268435456]
Equivalent form of C{expr.copy()} is just C{expr()}::
integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
"""
cpy = copy.copy( self )
cpy.parseAction = self.parseAction[:]
cpy.ignoreExprs = self.ignoreExprs[:]
if self.copyDefaultWhiteChars:
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
return cpy
def searchString( self, instring, maxMatches=_MAX_INT ):
"""
Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
Example::
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
cap_word = Word(alphas.upper(), alphas.lower())
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
prints::
['More', 'Iron', 'Lead', 'Gold', 'I']
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException as exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def __add__(self, other ):
"""
Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
converts them to L{Literal}s by default.
Example::
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString(hello))
Prints::
Hello, World! -> ['Hello', ',', 'World', '!']
"""
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
return None
return And( [ self, other ] )
def __call__(self, name=None):
"""
Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
passed as C{True}.
If C{name} is omitted, same as calling C{L{copy}}.
Example::
# these are equivalent
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
"""
if name is not None:
return self.setResultsName(name)
else:
return self.copy()
def ignore( self, other ):
"""
Define expression to be ignored (e.g., comments) while doing pattern
matching; may be called repeatedly, to define multiple comment or other
ignorable patterns.
Example::
patt = OneOrMore(Word(alphas))
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
patt.ignore(cStyleComment)
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
"""
if isinstance(other, basestring):
other = Suppress(other)
if isinstance( other, Suppress ):
if other not in self.ignoreExprs:
self.ignoreExprs.append(other)
else:
self.ignoreExprs.append( Suppress( other.copy() ) )
return self
def __str__( self ):
try:
return super(Word,self).__str__()
except Exception:
pass
if self.strRepr is None:
def charsAsStr(s):
if len(s)>4:
return s[:4]+"..."
else:
return s
if ( self.initCharsOrig != self.bodyCharsOrig ):
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
else:
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
return self.strRepr
def delimitedList( expr, delim=",", combine=False ):
"""
Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed.
Example::
delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
"""
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
if combine:
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
else:
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def locatedExpr(expr):
"""
Helper to decorate a returned token with its starting and ending locations in the input string.
This helper adds the following results names:
- locn_start = location where matched expression begins
- locn_end = location where matched expression ends
- value = the actual parsed results
Be careful if the input text contains C{<TAB>} characters, you may want to call
C{L{ParserElement.parseWithTabs}}
Example::
wd = Word(alphas)
for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
print(match)
prints::
[[0, 'ljsdf', 5]]
[[8, 'lksdjjf', 15]]
[[18, 'lkkjj', 23]]
"""
locator = Empty().setParseAction(lambda s,l,t: l)
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
# convenience constants for positional expressions
def srange(s):
r"""
Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp '[]' string range definitions::
srange("[0-9]") -> "0123456789"
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
The input string must be enclosed in []'s, and the returned string is the expanded
character set joined into a single string.
The values enclosed in the []'s may be:
- a single character
- an escaped character with a leading backslash (such as C{\-} or C{\]})
- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
(C{\0x##} is also supported for backwards compatibility)
- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
- a range of any of the above, separated by a dash (C{'a-z'}, etc.)
- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
"""
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
try:
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
except Exception:
return ""
def get(self, key, defaultValue=None):
"""
Returns named result matching the given key, or if there is no
such name, then returns the given C{defaultValue} or C{None} if no
C{defaultValue} is specified.
Similar to C{dict.get()}.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString("1999/12/31")
print(result.get("year")) # -> '1999'
print(result.get("hour", "not specified")) # -> 'not specified'
print(result.get("hour")) # -> None
"""
if key in self:
return self[key]
else:
return defaultValue
def insert( self, index, insStr ):
"""
Inserts new element at location index in the list of parsed tokens.
Similar to C{list.insert()}.
Example::
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
# use a parse action to insert the parse location in the front of the parsed results
def insert_locn(locn, tokens):
tokens.insert(0, locn)
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
"""
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ):
"""
Add sequence of elements to end of ParseResults list of elements.
Example::
patt = OneOrMore(Word(alphas))
# use a parse action to append the reverse of the matched strings, to make a palindrome
def make_palindrome(tokens):
tokens.extend(reversed([t[::-1] for t in tokens]))
return ''.join(tokens)
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
"""
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs):
"""
Pretty-printer for parsed results as a list, using the C{pprint} module.
Accepts additional positional or keyword args as defined for the
C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
Example::
ident = Word(alphas, alphanums)
num = Word(nums)
func = Forward()
term = ident | num | Group('(' + func + ')')
func <<= ident + Group(Optional(delimitedList(term)))
result = func.parseString("fna a,b,(fnb c,d,200),100")
result.pprint(width=40)
prints::
['fna',
['a',
'b',
['(', 'fnb', ['c', 'd', '200'], ')'],
'100']]
"""
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol
def copy( self ):
"""
Make a copy of this C{ParserElement}. Useful for defining different parse actions
for the same parsing pattern, using copies of the original parse element.
Example::
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
prints::
[5120, 100, 655360, 268435456]
Equivalent form of C{expr.copy()} is just C{expr()}::
integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
"""
cpy = copy.copy( self )
cpy.parseAction = self.parseAction[:]
cpy.ignoreExprs = self.ignoreExprs[:]
if self.copyDefaultWhiteChars:
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
return cpy
def searchString( self, instring, maxMatches=_MAX_INT ):
"""
Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
Example::
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
cap_word = Word(alphas.upper(), alphas.lower())
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
prints::
['More', 'Iron', 'Lead', 'Gold', 'I']
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException as exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def __add__(self, other ):
"""
Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
converts them to L{Literal}s by default.
Example::
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString(hello))
Prints::
Hello, World! -> ['Hello', ',', 'World', '!']
"""
if isinstance( other, basestring ):
other = ParserElement._literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
return None
return And( [ self, other ] )
def __call__(self, name=None):
"""
Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
passed as C{True}.
If C{name} is omitted, same as calling C{L{copy}}.
Example::
# these are equivalent
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
"""
if name is not None:
return self.setResultsName(name)
else:
return self.copy()
def ignore( self, other ):
"""
Define expression to be ignored (e.g., comments) while doing pattern
matching; may be called repeatedly, to define multiple comment or other
ignorable patterns.
Example::
patt = OneOrMore(Word(alphas))
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
patt.ignore(cStyleComment)
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
"""
if isinstance(other, basestring):
other = Suppress(other)
if isinstance( other, Suppress ):
if other not in self.ignoreExprs:
self.ignoreExprs.append(other)
else:
self.ignoreExprs.append( Suppress( other.copy() ) )
return self
def __str__( self ):
try:
return super(Word,self).__str__()
except Exception:
pass
if self.strRepr is None:
def charsAsStr(s):
if len(s)>4:
return s[:4]+"..."
else:
return s
if ( self.initCharsOrig != self.bodyCharsOrig ):
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
else:
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
return self.strRepr
def delimitedList( expr, delim=",", combine=False ):
"""
Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed.
Example::
delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
"""
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
if combine:
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
else:
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def locatedExpr(expr):
"""
Helper to decorate a returned token with its starting and ending locations in the input string.
This helper adds the following results names:
- locn_start = location where matched expression begins
- locn_end = location where matched expression ends
- value = the actual parsed results
Be careful if the input text contains C{<TAB>} characters, you may want to call
C{L{ParserElement.parseWithTabs}}
Example::
wd = Word(alphas)
for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
print(match)
prints::
[[0, 'ljsdf', 5]]
[[8, 'lksdjjf', 15]]
[[18, 'lkkjj', 23]]
"""
locator = Empty().setParseAction(lambda s,l,t: l)
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
# convenience constants for positional expressions
def srange(s):
r"""
Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp '[]' string range definitions::
srange("[0-9]") -> "0123456789"
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
The input string must be enclosed in []'s, and the returned string is the expanded
character set joined into a single string.
The values enclosed in the []'s may be:
- a single character
- an escaped character with a leading backslash (such as C{\-} or C{\]})
- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
(C{\0x##} is also supported for backwards compatibility)
- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
- a range of any of the above, separated by a dash (C{'a-z'}, etc.)
- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
"""
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
try:
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
except Exception:
return ""
def get(self, key, defaultValue=None):
"""
Returns named result matching the given key, or if there is no
such name, then returns the given C{defaultValue} or C{None} if no
C{defaultValue} is specified.
Similar to C{dict.get()}.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parseString("1999/12/31")
print(result.get("year")) # -> '1999'
print(result.get("hour", "not specified")) # -> 'not specified'
print(result.get("hour")) # -> None
"""
if key in self:
return self[key]
else:
return defaultValue
def insert( self, index, insStr ):
"""
Inserts new element at location index in the list of parsed tokens.
Similar to C{list.insert()}.
Example::
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
# use a parse action to insert the parse location in the front of the parsed results
def insert_locn(locn, tokens):
tokens.insert(0, locn)
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
"""
self.__toklist.insert(index, insStr)
# fixup indices in token dictionary
for name,occurrences in self.__tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ):
"""
Add sequence of elements to end of ParseResults list of elements.
Example::
patt = OneOrMore(Word(alphas))
# use a parse action to append the reverse of the matched strings, to make a palindrome
def make_palindrome(tokens):
tokens.extend(reversed([t[::-1] for t in tokens]))
return ''.join(tokens)
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
"""
if isinstance(itemseq, ParseResults):
self += itemseq
else:
self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs):
"""
Pretty-printer for parsed results as a list, using the C{pprint} module.
Accepts additional positional or keyword args as defined for the
C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
Example::
ident = Word(alphas, alphanums)
num = Word(nums)
func = Forward()
term = ident | num | Group('(' + func + ')')
func <<= ident + Group(Optional(delimitedList(term)))
result = func.parseString("fna a,b,(fnb c,d,200),100")
result.pprint(width=40)
prints::
['fna',
['a',
'b',
['(', 'fnb', ['c', 'd', '200'], ')'],
'100']]
"""
pprint.pprint(self.asList(), *args, **kwargs)
# add support for pickle protocol