def load_w2v(corpus, dictionary):
'''
Return the trained Word2Vec model
Train a model if model doesn't exist yet
:param corpus:
:param dictionary:
:return:
'''
if not os.path.isfile(W2V_MODEL_PATH):
num_features = 300 # Word vector dimensionality
min_word_count = 5 # Minimum word count
num_workers = 5 # Number of threads to run in parallel
window = 5 # Context window size
downsampling = 1e-5 # Downsample setting for frequent words
print("Training the word2vec model!")
sents = get_review_sentences()
# Initialize and train the model (this will take some time)
model = models.Word2Vec(sents, workers=num_workers, \
size=num_features, min_count = min_word_count, \
window = window, sample = downsampling)
# If you don't plan to train the model any further, calling
# init_sims will make the model much more memory-efficient.
model.init_sims(replace=True)
# It can be helpful to create a meaningful model name and
# save the model for later use. You can load it later using Word2Vec.load()
model.save(W2V_MODEL_PATH)
tfidf = models.Word2Vec(corpus)
print('Word2vec model created!')
print('Loading word2vec model')
w2v = models.Word2Vec.load(W2V_MODEL_PATH)
print('Loading word2vec model complished!')
return w2v
评论列表
文章目录