def highlight(self, block):
"""Method called on each block to highlight it content"""
tokens = pygments.lex(block, self.python_lexer)
if self.format_rst:
from pygments.token import Token
toks = []
for token in tokens:
if token[0] == Token.String.Doc and len(token[1]) > 6:
toks += pygments.lex(token[1][:3], self.python_lexer)
# parse doc string content by rst lexer
toks += pygments.lex(token[1][3:-3], self.rst_lexer)
toks += pygments.lex(token[1][-3:], self.python_lexer)
elif token[0] == Token.Comment.Single:
toks.append((Token.Comment.Single, token[1][0]))
# parse comment content by rst lexer
# remove the extrat newline added by rst lexer
toks += list(pygments.lex(token[1][1:], self.rst_lexer))[:-1]
else:
toks.append(token)
tokens = toks
return pygments.format(tokens, self.formatter)
python类lex()的实例源码
def fix_preprocessor_defs(tokens, lexer):
res = []
for t in tokens:
token_split = t[1].split()
if not is_token_subtype(t[0], Token.Literal.String) and len(token_split) > 1:
if t[0] == Token.Comment.PreprocFile:
if t[1].startswith('"'):
end = t[1].find('"', t[1].find('"') + 1) + 1
elif t[1].startswith('<'):
end = t[1].find('>') + 1
else:
end = t[1].find(' ') + 1
res.append((t[0], t[1][:end]))
else:
token_lexed = list(lex(' '.join(token_split), lexer))
res += token_lexed
else:
res += (t,)
return res
pygments_code_block_directive.py 文件源码
项目:deviation-manual
作者: DeviationTX
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __iter__(self):
"""parse code string and yield "clasified" tokens
"""
try:
tokens = self.lex()
except IOError:
log.info("Pygments lexer not found, using fallback")
# TODO: write message to INFO
yield ('', self.code)
return
for ttype, value in self.join(tokens):
yield (_get_ttype_class(ttype), value)
# code_block_directive
# --------------------
# ::
def highlight(self, block):
"""Method called on each block to highlight it content"""
tokens = pygments.lex(block, self.python_lexer)
if self.format_rst:
from pygments.token import Token
toks = []
for token in tokens:
if token[0] == Token.String.Doc and len(token[1]) > 6:
toks += pygments.lex(token[1][:3], self.python_lexer)
# parse doc string content by rst lexer
toks += pygments.lex(token[1][3:-3], self.rst_lexer)
toks += pygments.lex(token[1][-3:], self.python_lexer)
elif token[0] == Token.Comment.Single:
toks.append((Token.Comment.Single, token[1][0]))
# parse comment content by rst lexer
# remove the extrat newline added by rst lexer
toks += list(pygments.lex(token[1][1:], self.rst_lexer))[:-1]
else:
toks.append(token)
tokens = toks
return pygments.format(tokens, self.formatter)
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def tokenize_code(code, lexer, language, literal_option):
tokens = lex(code, lexer)
tokensList = list(tokens)
# Strip comments and alter strings
lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])
lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Literal.String.Doc)
lexedWoComments = fixTypes(lexedWoComments, language) #Alter the pygments lexer types to be more comparable between our languages
lexedWoComments = convertNamespaceTokens(lexedWoComments, language)
lexedWoComments = fix_preprocessor_defs(lexedWoComments, lexer)
lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])
if(literal_option == 0):
lexedWoComments = modifyStrings(lexedWoComments, underscoreString)
elif(literal_option == 1):
lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
elif(literal_option == 2):
lexedWoComments = modifyStrings(lexedWoComments, spaceString)
elif(literal_option == 3):
lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
lexedWoComments = collapseStrings(lexedWoComments)
lexedWoComments = modifyNumbers(lexedWoComments, singleNumberToken)
return get_tokenization(lexedWoComments, lexer)
# source_file: path of source file to be tokenized
# language: programming language of source file, e.g. "c"
# literal_option:
# 0 -> replace all spaces in strings with _
# 1 -> replace all strings with a <str> tag
# 2 -> add spaces to the ends of the strings
# 3 -> collapse strings to <str> and collapses numbers to a type as well.
pygments_code_block_directive.py 文件源码
项目:deviation-manual
作者: DeviationTX
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def lex(self):
# Get lexer for language (use text as fallback)
try:
if self.language and unicode(self.language).lower() <> 'none':
lexer = get_lexer_by_name(self.language.lower(),
**self.custom_args
)
else:
lexer = get_lexer_by_name('text', **self.custom_args)
except ValueError:
log.info("no pygments lexer for %s, using 'text'" \
% self.language)
# what happens if pygment isn't present ?
lexer = get_lexer_by_name('text')
return pygments.lex(self.code, lexer)
def format_testcase_diff(diff):
"""Format a testcase output diff.
PARAMETERS
diff: the diff content
RETURNS
a list of pygments' Tokens
"""
def new_line_token():
"""Generate a new line token."""
return Token.Whitespace, '\n'
def indent_token():
"""Generate an indentation space token."""
return Token.Whitespace, ' ' * 4
tokens = []
new_line = True
# Because of logging prefixes, skip the first line to avoid
# misalignment.
tokens.append(new_line_token())
for ttype, value in pygments.lex(diff, DiffLexer()):
for subval in value.split('\n'):
if new_line:
tokens.append(indent_token())
new_line = not subval
if subval:
tokens.append((ttype, subval))
else:
tokens.append(new_line_token())
return tokens
code_analyzer.py 文件源码
项目:tf_aws_ecs_instance_draining_on_scale_in
作者: terraform-community-modules
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def __iter__(self):
"""Parse self.code and yield "classified" tokens.
"""
if self.lexer is None:
yield ([], self.code)
return
tokens = pygments.lex(self.code, self.lexer)
for tokentype, value in self.merge(tokens):
if self.tokennames == 'long': # long CSS class args
classes = str(tokentype).lower().split('.')
else: # short CSS class args
classes = [_get_ttype_class(tokentype)]
classes = [cls for cls in classes if cls not in unstyled_tokens]
yield (classes, value)
def tokens(self, event=None):
"""
Highlight tokens as rendered by Pygments. Seems to only work after textarea is updated, though calling update_idletasks has no effect.
The problem can be solved by recalling the function if there is no bbox, (as with update_linenumbers), or figure out what is not updated
when running this function (bbox was the case in update_linenumbers).
"""
# http://stackoverflow.com/a/30199105
from pygments import lex, highlight
from pygments.lexers import PythonLexer
from pygments.formatters import HtmlFormatter
# don't use because multiline strings can start at beginning and end in visible view
#tv = self.mainframe.texthelper.top_visible(self.textarea)
# use since highlight works if multiline str not properly closed
bv = self.mainframe.texthelper.bottom_visible(self.textarea)
data = self.textarea.get("1.0", bv) # "end-1c"
if data == self.prevdata:
return
self.clear_tokens()
#print( highlight(data, PythonLexer(), HtmlFormatter()))
prev_content = ''
i = 0
for token, content in lex(data, PythonLexer()):
lencontent = len(content)
# this happens sometimes in lubuntu
if not content:
#print('no content in HighLight.tokens() loop')
continue
#str(token) == 'Token.Literal.String.Doc' \
if self.mainframe.texthelper.visible(self.textarea, '1.0 + %dc' % i) \
or self.mainframe.texthelper.visible(self.textarea, '1.0 + %dc' % (i+lencontent)):
self.textarea.mark_set("range_start", "1.0 + %dc" %i )
self.textarea.mark_set("range_end", "range_start + %dc" % lencontent)
self.textarea.tag_add(str(token), "range_start", "range_end")
i += lencontent
self.prevdata = data
def get_tokenization(lexedWoComments, lexer):
tokenized_string = ''
token_types = []
curr_line_empty = True
for t in lexedWoComments:
token_type = str(t[0])
token = t[1]
token_stripped = token.strip()
# Pygments will sometimes lex many tokens as one
# This can occur with preprocessor directives and definitions in C
# In this case, we need to lex that whole line
num_tokens = len(token.split())
if num_tokens > 1:
# Need to manually lex each space seperated token on occassions
# when pygments doesn't lex properly
line_split = token.split()
line_lexed = []
for temp_token in line_split:
token_lexed = list(lex(temp_token, lexer))
for lexed in token_lexed:
if lexed[1] != "\n":
line_lexed.append(lexed)
line_lexed.append((Token.Text, '\n'))
line_code, line_types = get_tokenization(line_lexed, lexer)
tokenized_string += line_code
token_types += line_types
curr_line_empty = True
continue
if '\n' in token:
if curr_line_empty:
if (t[0] != Token.Text or t[0] != Token.Comment.Preproc) and token_stripped != '':
tokenized_string += token_stripped + "\n"
token_types.append(token_type)
else:
tokenized_string += token_stripped + "\n"
# Edge case for stray "\" in code
if token_stripped == "\\":
token_types.append(token_type)
curr_line_empty = True
elif t[0] != Token.Text and len(token_stripped) > 0:
curr_line_empty = False
tokenized_string += token + ' '
token_types.append(token_type)
assert len(tokenized_string.split()) == len(token_types), "{0} != {1}".format(len(tokenized_string.split()), len(token_types))
return tokenized_string, token_types