def load_words(num_words):
words = get_words_from_nltk()
fdist = nltk.FreqDist(words)
fdistmc = fdist.most_common()
nd = OrderedDict()
nda = []
occurences = set([wt[1] for wt in fdistmc])
occurences = sorted(occurences, key=int, reverse=True)
for idx in occurences:
nd[idx] = sorted([wt[0] for wt in fdistmc if wt[1] == idx])
for key, val in nd.items():
nda += val
words = nda[:num_words]
return words
评论列表
文章目录