def trainWord2Vector(sentence_count, vector_dimension, train_count):
lines, model_out, vector_out = "sources/splited_words.txt", "result/word2vec.model", "result/pre_word2vec.vector"
logging.info("??????")
sentences = LineSentence(lines)
# ??min_count=3??????3?? ????????????word2vec.vector?
# workers????????????CPU?? ???3
# sg?????????
model = Word2Vec(sentences, sg=1, size=vector_dimension, window=8,
min_count=0, workers=multiprocessing.cpu_count())
# ????? ??????
for i in range(train_count):
model.train(sentences=sentences, total_examples=sentence_count, epochs=model.iter)
# trim unneeded model memory = use(much) less RAM
# model.init_sims(replace=True)
model.save(model_out)
model.wv.save_word2vec_format(vector_out)
评论列表
文章目录