def build_dictionary(sentences, vocabulary_size):
# Turn sentences (list of strings) into lists of words
split_sentences = [s.split() for s in sentences]
words = [x for sublist in split_sentences for x in sublist]
# Initialize list of [word, word_count] for each word, starting with unknown
count = [['RARE', -1]]
# Now add most frequent words, limited to the N-most frequent (N=vocabulary size)
count.extend(collections.Counter(words).most_common(vocabulary_size-1))
# Now create the dictionary
word_dict = {}
# For each word, that we want in the dictionary, add it, then make it
# the value of the prior dictionary length
for word, word_count in count:
word_dict[word] = len(word_dict)
return(word_dict)
# Turn text data into lists of integers from dictionary
word2vec_skipgram.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录