log_probablity.py 文件源码-python代码片段

log_probablity.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：word2vec_pipeline 作者: NIHOPA 项目源码文件源码

def create_partition_function(self, f_w2v, f_h5):
        print("Building the partition function")

        # Load the model from disk
        M = load_w2vec()

        words = M.wv.index2word
        ZT = []
        INPUT_ITR = tqdm.tqdm(words)

        # Compute the partition function for each word
        for w in INPUT_ITR:
            UE = self.energy(M.wv.syn0, M[w])
            z = compute_partition_stats(UE)
            ZT.append(z)

        # Save the partition function to disk
        # (special care needed for h5py unicode strings)
        dt = h5py.special_dtype(vlen=unicode)

        with h5py.File(f_h5, 'w') as h5:

            h5.create_dataset("words", (len(words),),
                              dtype=dt,
                              data=[w.encode('utf8') for w in words])

            h5.attrs['vocab_N'] = len(words)
            h5['Z'] = ZT