def words2ngrams(sep, num, tokens):
'''Convert word tokens into ngrams. ngrams are n-length word tokens.
Punctuation is considered as a separate token.'''
content = read_tokens(tokens)
ngrams = list(nltk.ngrams(content, num))
write_csv(ngrams, str(sep))
评论列表
文章目录