def get_tfidf_weighted_keyphrases(sentences,
grammar=r'NP: {<DT>? <JJ>* <NN.*>+}',
top_n=10):
valid_chunks = get_chunks(sentences, grammar=grammar)
dictionary = corpora.Dictionary(valid_chunks)
corpus = [dictionary.doc2bow(chunk) for chunk in valid_chunks]
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
weighted_phrases = {dictionary.get(id): round(value,3)
for doc in corpus_tfidf
for id, value in doc}
weighted_phrases = sorted(weighted_phrases.items(),
key=itemgetter(1), reverse=True)
return weighted_phrases[:top_n]
keyphrase_extraction.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录