def create_boc_w2v_load(models,doc_path,win,freq,num_concept,model_path):
'''
Creates (word, concept) result for given dimension, window, min freq threshold and num of concepts Trains new W2v models simultaneously
'''
all_param=[]
for em in models:
em_name=em.split("/")[-1]
model=KeyedVectors.load_word2vec_format(em)
wlist=get_tokens(doc_path,freq)
wM=get_wordvectors(model,wlist)
for ecp in num_concpt:
w2c_output="w2c_d%s_w%s_mf%s_c%s.csv" %(str(em_name),str(win),str(freq),str(ecp))
boc_output="boc_d%s_w%s_mf%s_c%s.csv" %(str(em_name),str(win),str(freq),str(ecp))
word2concept=create_concepts(wM,wlist,w2c_output,num_concept)
boc=apply_cfidf(doc_path,word2concept,num_concept)
np.savetxt(boc_output, boc, delimiter=",")
print(".... BOC vectors created in %s" %boc_output)
all_param.append(namedtuple('parameters','document_path dimension window_size min_freq num_concept'))
return all_param
评论列表
文章目录