preprocessing.py 文件源码

python
阅读 57 收藏 0 点赞 0 评论 0

项目:image-text-matching 作者: llltttppp 项目源码 文件源码
def generate_fishervector(sample_set,ica_model='./model/ICA/ica_ourword2vec.model',gmm_model_path='./model/GMM/gmm_ourword2vec.model',max_num = 30000):
    ica = joblib.load(ica_model)
    gmm_model =pkl.load(open(gmm_model_path,'r'))
    centenrs = gmm_model[0].shape[0]
    dims = gmm_model[1].shape[1]
    fishervector = np.zeros([len(sample_set),centenrs*dims*2])+0.00001
    for i,v in enumerate(sample_set):
        words =v.strip().split(' ')
        words = words[:min(len(words),max_num+200)]
        vectors =[]
        for j in words:
            try:
                vectors.append(word2vec_model[j])
            except:
                pass#print 'Not found %s'%j
        if len(vectors) >0:
            vectors=vectors[:min(len(vectors),max_num)]
            fishervector[i]=yael.ynumpy.fisher(gmm_model,ica.transform(np.array(vectors)).astype(np.float32) ,include='mu sigma')
    print 'mean vectors is',fishervector.mean(0)
    return fishervector
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号