preprocess_cross_val.py 文件源码-python代码片段

def generate_dataset(self, split_dir, mode='training'):

        if mode not in ['training', 'testing']:
            raise ValueError("Mode must be 'training' or 'testing'")

        do_augment = mode == 'training'  # we only want to augment the training data
        split_df = pd.DataFrame.from_csv(join(split_dir, '{}.csv'.format(mode)))  # load splits
        data_dir = make_sub_dir(split_dir, mode)  # output directory for images

        # Make directories for each class of images in advance
        classes = [str(l) for l in split_df[self.label].unique()]
        for class_name in classes:
            make_sub_dir(data_dir, str(class_name))

        # Pre-process, augment and randomly sample the training set
        print "Preprocessing {} data...".format(mode)

        if len(find_images(join(data_dir, '*'))) == 0:
            pool = Pool(self.processes)
            subprocess = partial(do_preprocess, args={'params': self, 'augment': do_augment, 'out_dir': data_dir})
            img_list = list(split_df['full_path'])
            _ = pool.map(subprocess, img_list)

        self.generate_h5(find_images_by_class(data_dir, classes=classes), join(split_dir, '{}.h5'.format(mode)), split_df,
                         random_sample=True, classes=classes)