def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
Each line is a list of pairs, each pair is a token::
[('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
Each pair has a token class, and the token text.
If you concatenate all the token texts, and then join them with newlines,
you should have your original `source` back, with two differences:
trailing whitespace is not preserved, and a final line with no newline
is indistinguishable from a final line with a newline.
"""
ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
tokgen = tokenize.generate_tokens(StringIO(source).readline)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
if part == '\n':
yield line
line = []
col = 0
mark_end = False
elif part == '':
mark_end = False
elif ttype in ws_tokens:
mark_end = False
else:
if mark_start and scol > col:
line.append(("ws", " " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
tok_class = "key"
line.append((tok_class, part))
mark_end = True
scol = 0
if mark_end:
col = ecol
if line:
yield line
评论列表
文章目录