pyparsing.py 文件源码-python代码片段

def originalTextFor(expr, asString=True):
    """
    Helper to return the original, untokenized text for a given expression.  Useful to
    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
    revert separate tokens with intervening whitespace back to the original matching
    input text. By default, returns astring containing the original parsed text.  

    If the optional C{asString} argument is passed as C{False}, then the return value is a 
    C{L{ParseResults}} containing any results names that were originally matched, and a 
    single token containing the original matched text from the input string.  So if 
    the expression passed to C{L{originalTextFor}} contains expressions with defined
    results names, you must set C{asString} to C{False} if you want to preserve those
    results name values.

    Example::
        src = "this is test <b> bold <i>text</i> </b> normal text "
        for tag in ("b","i"):
            opener,closer = makeHTMLTags(tag)
            patt = originalTextFor(opener + SkipTo(closer) + closer)
            print(patt.searchString(src)[0])
    prints::
        ['<b> bold <i>text</i> </b>']
        ['<i>text</i>']
    """
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    endlocMarker = locMarker.copy()
    endlocMarker.callPreparse = False
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
    matchExpr.setParseAction(extractText)
    matchExpr.ignoreExprs = expr.ignoreExprs
    return matchExpr