def clean_token(token):
token = token.replace('"', '"')
token = token.replace(''', "'")
token = token.replace(chr(int("85",16)), "...")
token = token.replace(chr(int("91",16)), "'")
token = token.replace(chr(int("92",16)), "'")
token = token.replace(chr(int("93",16)), '"')
token = token.replace(chr(int("94",16)), '"')
token = token.replace(chr(int("96",16)), '-')
if not is_printable(token):
sys.stderr.write('TOKEN NOT PRINTABLE: '+''.join([str(c) for c in token if c in string.printable ]) + '\n')
return "<UNK>"
else:
return token
评论列表
文章目录