def stem_split(tokens):
""" Takes a list of tokens and splits stemmed tokens into
stem, ending - inserting ending as extra token.
returns: revised (possibly longer) list of tokens. """
stemmer = PorterStemmer()
token_list = list()
for token in tokens:
stem = stemmer.stem(token)
split_list = token.split(stem)
if token == stem:
token_list.append(token)
elif len(split_list) > 1:
token_list.append(stem)
token_list.append(split_list[1])
else:
token_list.append(split_list[0])
return token_list
评论列表
文章目录