def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
python类OP的实例源码
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def Subst(string, maker):
return OP(string) >> (lambda t: lambda ctx: maker(lineno=t.start[0], col_offset=t.start[1]))
def _parse_parameters(self):
""" Parses parameter list """
# Check and skip over '('
t = self._next_token()
if t.type != TokenType.OP or t.value != '(':
raise ParseError("Expected '(' of parameter list, got '%s'" % (t.value,))
parameters = []
while self._tokens_left():
# Check for ')' that would end parameter list
t = self._peek_token()
if t.type == TokenType.OP and t.value == ')':
self._next_token()
return parameters
# Parse one parameter
parameters.append(self._parse_parameter())
# Check if next token is either ')' or ','
t = self._peek_token()
while t.type == TokenType.NEWLINE or t.value == "\n":
self._next_token()
if not self._tokens_left():
raise ParseError("Expected ',' or end of parameter list after parameter '%s'" % (parameters[-1],))
t = self._peek_token()
if t.type == TokenType.OP and t.value == ')':
pass
elif t.type == TokenType.OP and t.value == ',':
self._next_token()
else:
raise ParseError("Expected ',' or end of parameter list after parameter '%s'" % (parameters[-1],))
# Code shouldn't reach here, unless there is not closing ')' in parameter list
raise ParseError("Unmatched parenthesis")
def _expand_to_matching_pairs(self, first_token, last_token, node):
"""
Scan tokens in [first_token, last_token] range that are between node's children, and for any
unmatched brackets, adjust first/last tokens to include the closing pair.
"""
# We look for opening parens/braces among non-child tokens (i.e. tokens between our actual
# child nodes). If we find any closing ones, we match them to the opens.
to_match_right = []
to_match_left = []
for tok in self._iter_non_child_tokens(first_token, last_token, node):
tok_info = tok[:2]
if to_match_right and tok_info == to_match_right[-1]:
to_match_right.pop()
elif tok_info in _matching_pairs_left:
to_match_right.append(_matching_pairs_left[tok_info])
elif tok_info in _matching_pairs_right:
to_match_left.append(_matching_pairs_right[tok_info])
# Once done, extend `last_token` to match any unclosed parens/braces.
for match in reversed(to_match_right):
last = self._code.next_token(last_token)
# Allow for a trailing comma before the closing delimiter.
if util.match_token(last, token.OP, ','):
last = self._code.next_token(last)
# Now check for the actual closing delimiter.
if util.match_token(last, *match):
last_token = last
# And extend `first_token` to match any unclosed opening parens/braces.
for match in to_match_left:
first = self._code.prev_token(first_token)
if util.match_token(first, *match):
first_token = first
return (first_token, last_token)
#----------------------------------------------------------------------
# Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair
# that will actually be assigned.
def handle_comp(self, open_brace, node, first_token, last_token):
# For list/set/dict comprehensions, we only get the token of the first child, so adjust it to
# include the opening brace (the closing brace will be matched automatically).
before = self._code.prev_token(first_token)
util.expect_token(before, token.OP, open_brace)
return (before, last_token)
def handle_attr(self, node, first_token, last_token):
# Attribute node has ".attr" (2 tokens) after the last child.
dot = self._code.find_token(last_token, token.OP, '.')
name = self._code.next_token(dot)
util.expect_token(name, token.NAME)
return (first_token, name)
def visit_call(self, node, first_token, last_token):
# A function call isn't over until we see a closing paren. Remember that last_token is at the
# end of all children, so we are not worried about encountering a paren that belongs to a
# child.
return (first_token, self._code.find_token(last_token, token.OP, ')'))
def visit_subscript(self, node, first_token, last_token):
# A subscript operations isn't over until we see a closing bracket. Similar to function calls.
return (first_token, self._code.find_token(last_token, token.OP, ']'))
def visit_num(self, node, first_token, last_token):
# A constant like '-1' gets turned into two tokens; this will skip the '-'.
while util.match_token(last_token, token.OP):
last_token = self._code.next_token(last_token)
return (first_token, last_token)
# In Astroid, the Num and Str nodes are replaced by Const.
def visit_keyword(self, node, first_token, last_token):
if node.arg is not None:
equals = self._code.find_token(first_token, token.OP, '=', reverse=True)
name = self._code.prev_token(equals)
util.expect_token(name, token.NAME, node.arg)
first_token = name
return (first_token, last_token)
def visit_starred(self, node, first_token, last_token):
# Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted.
if not util.match_token(first_token, token.OP, '*'):
star = self._code.prev_token(first_token)
if util.match_token(star, token.OP, '*'):
first_token = star
return (first_token, last_token)
def visit_assignname(self, node, first_token, last_token):
# Astroid may turn 'except' clause into AssignName, but we need to adjust it.
if util.match_token(first_token, token.NAME, 'except'):
colon = self._code.find_token(last_token, token.OP, ':')
first_token = last_token = self._code.prev_token(colon)
return (first_token, last_token)
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def fixLazyJson (in_text):
tokengen = tokenize.generate_tokens(StringIO(in_text.decode('ascii')).readline)
result = []
for tokid, tokval, _, _, _ in tokengen:
# fix unquoted strings
if (tokid == token.NAME):
if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']:
tokid = token.STRING
tokval = u'"%s"' % tokval
# fix single-quoted strings
elif (tokid == token.STRING):
if tokval.startswith ("'"):
tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')
# remove invalid commas
elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')):
if (len(result) > 0) and (result[-1][1] == ','):
result.pop()
# fix single-quoted strings
elif (tokid == token.STRING):
if tokval.startswith ("'"):
tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"')
result.append((tokid, tokval))
return tokenize.untokenize(result)
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def op_(s):
return some(lambda tok: tok.type == token.OP and tok.string == s)
def s_(string):
return skip(some(lambda tok: tok.type == token.OP and tok.string == string))
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def auto_positive_symbol(tokens, local_dict, global_dict):
"""
Inserts calls to ``Symbol`` for undefined variables.
Passes in positive=True as a keyword argument.
Adapted from sympy.sympy.parsing.sympy_parser.auto_symbol
"""
result = []
prevTok = (None, None)
tokens.append((None, None)) # so zip traverses all tokens
for tok, nextTok in zip(tokens, tokens[1:]):
tokNum, tokVal = tok
nextTokNum, nextTokVal = nextTok
if tokNum == token.NAME:
name = tokVal
if (name in ['True', 'False', 'None']
or iskeyword(name)
or name in local_dict
# Don't convert attribute access
or (prevTok[0] == token.OP and prevTok[1] == '.')
# Don't convert keyword arguments
or (prevTok[0] == token.OP and prevTok[1] in ('(', ',')
and nextTokNum == token.OP and nextTokVal == '=')):
result.append((token.NAME, name))
continue
elif name in global_dict:
obj = global_dict[name]
if isinstance(obj, (Basic, type)) or callable(obj):
result.append((token.NAME, name))
continue
result.extend([
(token.NAME, 'Symbol'),
(token.OP, '('),
(token.NAME, repr(str(name))),
(token.OP, ','),
(token.NAME, 'positive'),
(token.OP, '='),
(token.NAME, 'True'),
(token.OP, ')'),
])
else:
result.append((tokNum, tokVal))
prevTok = (tokNum, tokVal)
return result