Criteo_all.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:Ads-RecSys-Datasets 作者: Atomu2014 项目源码 文件源码
def __iter__(self, gen_type='train', batch_size=None, shuffle_block=False, random_sample=False, split_fields=False,
                 on_disk=True, squeeze_output=False, **kwargs):
        gen_type = gen_type.lower()

        if on_disk:
            print('on disk...')

            for hdf_X, hdf_y in self._files_iter_(gen_type=gen_type, shuffle_block=shuffle_block):
                # num_of_lines = pd.HDFStore(hdf_y, mode='r').get_storer('fixed').shape[0]

                X_all = pd.read_hdf(hdf_X, mode='r').as_matrix()
                y_all = pd.read_hdf(hdf_y, mode='r').as_matrix()

                gen = self.generator(X_all, y_all, batch_size, shuffle=random_sample)
                for X, y in gen:
                    if split_fields:
                        X = np.split(X, self.max_length, axis=1)
                        for i in range(self.max_length):
                            X[i] -= self.feat_min[i]
                    if squeeze_output:
                        y = y.squeeze()
                    yield X, y
        else:
            print('not implemented')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号