def create_word2vec_model(embedding_size, input_file=TEXT_DIR):
"""
Create the word2vec model based on the given embedding size and the corpus file.
:param embedding_size: The embedding size
:param input_file: The corpus file
"""
word2vec_file = 'word2vec_' + str(embedding_size) + '.model'
if os.path.isfile(word2vec_file):
logging.info('? The word2vec model you want create already exists!')
else:
sentences = word2vec.LineSentence(input_file)
# sg=0 means use CBOW model(default); sg=1 means use skip-gram model.
model = gensim.models.Word2Vec(sentences, size=embedding_size, min_count=0,
sg=0, workers=multiprocessing.cpu_count())
model.save(word2vec_file)
data_helpers.py 文件源码
python
阅读 35
收藏 0
点赞 0
评论 0
评论列表
文章目录