def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
python类INDENT的实例源码
def _get_indentword(source):
"""Return indentation type."""
indent_word = ' ' # Default in case source has no indentation
try:
for t in generate_tokens(source):
if t[0] == token.INDENT:
indent_word = t[1]
break
except (SyntaxError, tokenize.TokenError):
pass
return indent_word
def _parse_tokens(tokens):
"""Parse the tokens.
This converts the tokens into a form where we can manipulate them
more easily.
"""
index = 0
parsed_tokens = []
num_tokens = len(tokens)
while index < num_tokens:
tok = Token(*tokens[index])
assert tok.token_type != token.INDENT
if tok.token_type == tokenize.NEWLINE:
# There's only one newline and it's at the end.
break
if tok.token_string in '([{':
(container, index) = _parse_container(tokens, index)
if not container:
return None
parsed_tokens.append(container)
else:
parsed_tokens.append(Atom(tok))
index += 1
return parsed_tokens
def multiline_string_lines(source, include_docstrings=False):
"""Return line numbers that are within multiline strings.
The line numbers are indexed at 1.
Docstrings are ignored.
"""
line_numbers = set()
previous_token_type = ''
try:
for t in generate_tokens(source):
token_type = t[0]
start_row = t[2][0]
end_row = t[3][0]
if token_type == tokenize.STRING and start_row != end_row:
if (
include_docstrings or
previous_token_type != tokenize.INDENT
):
# We increment by one since we want the contents of the
# string.
line_numbers |= set(range(1 + start_row, 1 + end_row))
previous_token_type = token_type
except (SyntaxError, tokenize.TokenError):
pass
return line_numbers
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = generate_tokens(source)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def _reindent_stats(tokens):
"""Return list of (lineno, indentlevel) pairs.
One for each stmt and comment line. indentlevel is -1 for comment lines, as
a signal that tokenize doesn't know what to do about them; indeed, they're
our headache!
"""
find_stmt = 1 # Next token begins a fresh stmt?
level = 0 # Current indent level.
stats = []
for t in tokens:
token_type = t[0]
sline = t[2][0]
line = t[4]
if token_type == tokenize.NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
find_stmt = 1
elif token_type == tokenize.INDENT:
find_stmt = 1
level += 1
elif token_type == tokenize.DEDENT:
find_stmt = 1
level -= 1
elif token_type == tokenize.COMMENT:
if find_stmt:
stats.append((sline, -1))
# But we're still looking for a new stmt, so leave
# find_stmt alone.
elif token_type == tokenize.NL:
pass
elif find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
find_stmt = 0
if line: # Not endmarker.
stats.append((sline, level))
return stats
def _reindent_stats(tokens):
"""Return list of (lineno, indentlevel) pairs.
One for each stmt and comment line. indentlevel is -1 for comment lines, as
a signal that tokenize doesn't know what to do about them; indeed, they're
our headache!
"""
find_stmt = 1 # Next token begins a fresh stmt?
level = 0 # Current indent level.
stats = []
for t in tokens:
token_type = t[0]
sline = t[2][0]
line = t[4]
if token_type == tokenize.NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
find_stmt = 1
elif token_type == tokenize.INDENT:
find_stmt = 1
level += 1
elif token_type == tokenize.DEDENT:
find_stmt = 1
level -= 1
elif token_type == tokenize.COMMENT:
if find_stmt:
stats.append((sline, -1))
# But we're still looking for a new stmt, so leave
# find_stmt alone.
elif token_type == tokenize.NL:
pass
elif find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
find_stmt = 0
if line: # Not endmarker.
stats.append((sline, level))
return stats
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = generate_tokens(source)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = tokenize.generate_tokens(StringIO(source).readline)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = tokenize.generate_tokens(StringIO(source).readline)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = tokenize.generate_tokens(StringIO(source).readline)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def _simulate_compile_singlemode(self, s):
# Calculate line offsets
lines = [0, 0]
pos = 0
while 1:
pos = s.find('\n', pos)+1
if not pos: break
lines.append(pos)
lines.append(len(s))
oldpos = 0
parenlevel = 0
deflevel = 0
output = []
stmt = []
text = StringIO(s)
tok_gen = tokenize.generate_tokens(text.readline)
for toktype, tok, (srow,scol), (erow,ecol), line in tok_gen:
newpos = lines[srow] + scol
stmt.append(s[oldpos:newpos])
if tok != '':
stmt.append(tok)
oldpos = newpos + len(tok)
# Update the paren level.
if tok in '([{':
parenlevel += 1
if tok in '}])':
parenlevel -= 1
if tok in ('def', 'class') and deflevel == 0:
deflevel = 1
if deflevel and toktype == token.INDENT:
deflevel += 1
if deflevel and toktype == token.DEDENT:
deflevel -= 1
# Are we starting a statement?
if ((toktype in (token.NEWLINE, tokenize.NL, tokenize.COMMENT,
token.INDENT, token.ENDMARKER) or
tok==':') and parenlevel == 0):
if deflevel == 0 and self._is_expr(stmt[1:-2]):
output += stmt[0]
output.append('__print__((')
output += stmt[1:-2]
output.append('))')
output += stmt[-2:]
else:
output += stmt
stmt = []
return ''.join(output)
def _reindent_stats(tokens):
"""Return list of (lineno, indentlevel) pairs.
One for each stmt and comment line. indentlevel is -1 for comment lines, as
a signal that tokenize doesn't know what to do about them; indeed, they're
our headache!
"""
find_stmt = 1 # Next token begins a fresh stmt?
level = 0 # Current indent level.
stats = []
for t in tokens:
token_type = t[0]
sline = t[2][0]
line = t[4]
if token_type == tokenize.NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
find_stmt = 1
elif token_type == tokenize.INDENT:
find_stmt = 1
level += 1
elif token_type == tokenize.DEDENT:
find_stmt = 1
level -= 1
elif token_type == tokenize.COMMENT:
if find_stmt:
stats.append((sline, -1))
# But we're still looking for a new stmt, so leave
# find_stmt alone.
elif token_type == tokenize.NL:
pass
elif find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
find_stmt = 0
if line: # Not endmarker.
stats.append((sline, level))
return stats
def _simulate_compile_singlemode(self, s):
# Calculate line offsets
lines = [0, 0]
pos = 0
while 1:
pos = s.find('\n', pos)+1
if not pos: break
lines.append(pos)
lines.append(len(s))
oldpos = 0
parenlevel = 0
deflevel = 0
output = []
stmt = []
text = StringIO(s)
tok_gen = tokenize.generate_tokens(text.readline)
for toktype, tok, (srow,scol), (erow,ecol), line in tok_gen:
newpos = lines[srow] + scol
stmt.append(s[oldpos:newpos])
if tok != '':
stmt.append(tok)
oldpos = newpos + len(tok)
# Update the paren level.
if tok in '([{':
parenlevel += 1
if tok in '}])':
parenlevel -= 1
if tok in ('def', 'class') and deflevel == 0:
deflevel = 1
if deflevel and toktype == token.INDENT:
deflevel += 1
if deflevel and toktype == token.DEDENT:
deflevel -= 1
# Are we starting a statement?
if ((toktype in (token.NEWLINE, tokenize.NL, tokenize.COMMENT,
token.INDENT, token.ENDMARKER) or
tok==':') and parenlevel == 0):
if deflevel == 0 and self._is_expr(stmt[1:-2]):
output += stmt[0]
output.append('__print__((')
output += stmt[1:-2]
output.append('))')
output += stmt[-2:]
else:
output += stmt
stmt = []
return ''.join(output)
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
line = []
col = 0
# The \f is because of http://bugs.python.org/issue19035
source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')
tokgen = generate_tokens(source)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", u" " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
def _simulate_compile_singlemode(self, s):
# Calculate line offsets
lines = [0, 0]
pos = 0
while 1:
pos = s.find('\n', pos)+1
if not pos: break
lines.append(pos)
lines.append(len(s))
oldpos = 0
parenlevel = 0
deflevel = 0
output = []
stmt = []
text = StringIO(s)
tok_gen = tokenize.generate_tokens(text.readline)
for toktype, tok, (srow,scol), (erow,ecol), line in tok_gen:
newpos = lines[srow] + scol
stmt.append(s[oldpos:newpos])
if tok != '':
stmt.append(tok)
oldpos = newpos + len(tok)
# Update the paren level.
if tok in '([{':
parenlevel += 1
if tok in '}])':
parenlevel -= 1
if tok in ('def', 'class') and deflevel == 0:
deflevel = 1
if deflevel and toktype == token.INDENT:
deflevel += 1
if deflevel and toktype == token.DEDENT:
deflevel -= 1
# Are we starting a statement?
if ((toktype in (token.NEWLINE, tokenize.NL, tokenize.COMMENT,
token.INDENT, token.ENDMARKER) or
tok==':') and parenlevel == 0):
if deflevel == 0 and self._is_expr(stmt[1:-2]):
output += stmt[0]
output.append('__print__((')
output += stmt[1:-2]
output.append('))')
output += stmt[-2:]
else:
output += stmt
stmt = []
return ''.join(output)