def get_top_ngrams(corpus, ngram_val=1, limit=5):
corpus = flatten_corpus(corpus)
tokens = nltk.word_tokenize(corpus)
ngrams = compute_ngrams(tokens, ngram_val)
ngrams_freq_dist = nltk.FreqDist(ngrams)
sorted_ngrams_fd = sorted(ngrams_freq_dist.items(),
key=itemgetter(1), reverse=True)
sorted_ngrams = sorted_ngrams_fd[0:limit]
sorted_ngrams = [(' '.join(text), freq)
for text, freq in sorted_ngrams]
return sorted_ngrams
keyphrase_extraction.py 文件源码
python
阅读 41
收藏 0
点赞 0
评论 0
评论列表
文章目录