def generate_collocations(tokens):
'''
Given list of tokens, return collocations.
'''
ignored_words = nltk.corpus.stopwords.words('english')
bigram_measures = nltk.collocations.BigramAssocMeasures()
# Best results with window_size, freq_filter of: (2,1) (2,2) (5,1)
finder = BigramCollocationFinder.from_words(tokens, window_size = 2)
finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
finder.apply_freq_filter(1)
colls = finder.nbest(bigram_measures.likelihood_ratio, 5)
return colls
word_processing.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录