def train_artistsong2vec_model(fout_path, input_datas=None, data_path=None,
min_count=5, sorted_vocab=1, window=10,
size=250,
iter_n=50):
if not input_datas and data_path:
input_datas = pickle.load(open(data_path, 'rb'))
full_data = []
for i in input_datas:
tmp = []
for j in i:
tmp.append(j[0])
tmp.append(j[1])
full_data.append(tmp)
data_process_logger.info('start training')
wv_model = gensim.models.Word2Vec(full_data, min_count=min_count, sorted_vocab=sorted_vocab, window=window,
size=size, iter=iter_n)
with open(fout_path, 'wb') as fout:
data_process_logger.info('start saving model')
pickle.dump(wv_model, fout)
print 'model saved'
评论列表
文章目录