generators.py 文件源码-python代码片段

generators.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

项目：pyannote-audio 作者: pyannote 项目源码文件源码

def _precompute(self, Xy_generator, cache):

        with h5py.File(cache, mode='w') as fp:

            # initialize with a fixed number of sequences
            n_sequences = 1000

            y = fp.create_dataset(
                'y', shape=(n_sequences, ),
                dtype=h5py.special_dtype(vlen=bytes),
                maxshape=(None, ))

            for i, (X_, y_) in enumerate(Xy_generator):

                if i == 0:
                    _, n_samples, n_features = X_.shape
                    X = fp.create_dataset(
                        'X', dtype=X_.dtype, compression='gzip',
                        shape=(n_sequences, n_samples, n_features),
                        chunks=(1, n_samples, n_features),
                        maxshape=(None, n_samples, n_features))

                # increase number of sequences on demand
                if i == n_sequences:
                    n_sequences = int(n_sequences * 1.1)
                    y.resize(n_sequences, axis=0)
                    X.resize(n_sequences, axis=0)

                # store current X, y in file
                y[i] = y_
                X[i] = X_

            # resize file to exactly match the number of sequences
            y.resize(i, axis=0)
            X.resize(i, axis=0)