utils.py 文件源码-python代码片段

utils.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：TrickleDownML 作者: andykamath 项目源码文件源码

def preprocess(self, input_file, vocab_file, tensor_file):
        with open(input_file, "r") as f:
            data = f.read()

        # Optional text cleaning or make them lower case, etc.
        #data = self.clean_str(data)
        x_text = data.split()

        self.vocab, self.words = self.build_vocab(x_text)
        self.vocab_size = len(self.words)

        with open(vocab_file, 'wb') as f:
            cPickle.dump(self.words, f)

        #The same operation like this [self.vocab[word] for word in x_text]
        # index of words as our basic data
        self.tensor = np.array(list(map(self.vocab.get, x_text)))
        # Save the data to data.npy
        np.save(tensor_file, self.tensor)