simplePyLex.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:token-rnn-tensorflow 作者: aalmendoza 项目源码 文件源码
def tokenize_code(code, lexer, language, literal_option):
    tokens = lex(code, lexer)
    tokensList = list(tokens)

    # Strip comments and alter strings
    lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])
    lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Literal.String.Doc)
    lexedWoComments = fixTypes(lexedWoComments, language) #Alter the pygments lexer types to be more comparable between our languages
    lexedWoComments = convertNamespaceTokens(lexedWoComments, language)
    lexedWoComments = fix_preprocessor_defs(lexedWoComments, lexer)
    lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])

    if(literal_option == 0):
        lexedWoComments = modifyStrings(lexedWoComments, underscoreString)
    elif(literal_option == 1):
        lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
    elif(literal_option == 2):
        lexedWoComments = modifyStrings(lexedWoComments, spaceString)
    elif(literal_option == 3):
        lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
        lexedWoComments = collapseStrings(lexedWoComments)
        lexedWoComments = modifyNumbers(lexedWoComments, singleNumberToken)

    return get_tokenization(lexedWoComments, lexer)

# source_file: path of source file to be tokenized
# language: programming language of source file, e.g. "c"
# literal_option:
#   0 -> replace all spaces in strings with _
#   1 -> replace all strings with a <str> tag
#   2 -> add spaces to the ends of the strings
#   3 -> collapse strings to <str> and collapses numbers to a type as well.
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号