def build_dictionary(sentences, size):
"""
Create dictionary containing most frequent words in the sentences
:param sentences: sequence of sentence that contains words
Caution: the sequence might be exhausted after calling this function!
:param size: size of dictionary you want
:return: dictionary that maps word to index (starting from 1)
"""
dictionary = defaultdict(int)
for sentence in sentences:
for token in sentence:
dictionary[token] += 1
frequent_pairs = nlargest(size, dictionary.items(), itemgetter(1))
words, frequencies = zip(*frequent_pairs)
result = {word: index + 1 for index, word in enumerate(words)}
return result
评论列表
文章目录