every2vec.py 文件源码-python代码片段

every2vec.py 文件源码

python

阅读 19 收藏 0 点赞 0 评论 0

def prep_embed(self, full_i2w_dict, ind2word, w2v_dim):
        '''
        Prepare embedding vector for each word in full_dict

        Words which are in word2vec vocab are replaced by respective wordvector
        OOV words i.e words that are not in word2vec are replaced by random weight(rand_weight)
        '''
        embed_weight=np.zeros((len(full_i2w_dict),w2v_dim))
        embed_dict={}
        for k,v in full_i2w_dict.items():
            if k in ind2word:
                model_weight=np.array(self.model[v])
                embed_weight[k]=model_weight
                embed_dict[k]=model_weight
            else: 
                rand_weight=np.array(glorot_uniform((w2v_dim,)).eval())
                embed_weight[k]=rand_weight
                embed_dict[k]=rand_weight
        return embed_weight, embed_dict