def group_tokens(tokens):
"""
Join and separate tokens to be more suitable for diffs.
Transformations:
- Empty tokens are removed
- Text containing newlines is split to have the newline be one token
- Other sequential whitespace tokens are joined
- Token types which contain freeform text (ie. comments, strings) are split into words
"""
for token_type, group in itertools.groupby(tokens, get_token_type):
if any(token_type in type_set for type_set in JOIN_TOKENS):
text = ''.join(get_token_text(token) for token in group)
group = [(token_type, text)]
if any(token_type in type_set for type_set in WORD_TOKENS):
group = (
(token_type, word)
for token in group
for word in split_words(get_token_text(token))
)
# Split by newlines
for token in group:
text_parts = re.split(r'(\n)', get_token_text(token))
for text_part in text_parts:
# Empty tokens are discarded, to avoid confusing
# difflib or highlighting empty regions
if text_part:
yield (token_type, text_part)
working_with_tokens.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录