def trainDoc2Vector(sentence_count, vector_dimension):
# train and save the model
sentences = TaggedLineDocument('sources/splited_words.txt')
model = Doc2Vec(sentences, size=vector_dimension, window=8, min_count=2, workers=multiprocessing.cpu_count())
model.train(sentences, total_examples=sentence_count, epochs=model.iter)
model.save('result/doc2vec.model')
# save vectors
out = open('result/doc2vec.vector', mode='w+', encoding='utf-8')
for index in range(0, sentence_count, 1):
docvec = model.docvecs[index]
out.write(' '.join(str(f) for f in docvec) + "\n")
out.close()
评论列表
文章目录