def generate_fishervector(sample_set,ica_model='./model/ICA/ica_ourword2vec.model',gmm_model_path='./model/GMM/gmm_ourword2vec.model',max_num = 30000):
ica = joblib.load(ica_model)
gmm_model =pkl.load(open(gmm_model_path,'r'))
centenrs = gmm_model[0].shape[0]
dims = gmm_model[1].shape[1]
fishervector = np.zeros([len(sample_set),centenrs*dims*2])+0.00001
for i,v in enumerate(sample_set):
words =v.strip().split(' ')
words = words[:min(len(words),max_num+200)]
vectors =[]
for j in words:
try:
vectors.append(word2vec_model[j])
except:
pass#print 'Not found %s'%j
if len(vectors) >0:
vectors=vectors[:min(len(vectors),max_num)]
fishervector[i]=yael.ynumpy.fisher(gmm_model,ica.transform(np.array(vectors)).astype(np.float32) ,include='mu sigma')
print 'mean vectors is',fishervector.mean(0)
return fishervector
评论列表
文章目录