dataset.py 文件源码-python代码片段

dataset.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：chazutsu 作者: chakki-works 项目源码文件源码

def label_by_dir(self, file_path, target_dir, dir_and_label, task_size=10):
        label_dirs = dir_and_label.keys()
        dirs = [d for d in os.listdir(target_dir)
                if os.path.isdir(os.path.join(target_dir, d))
                and d in label_dirs]

        write_flg = True
        for d in dirs:
            self.logger.info(
                "Extracting {} (labeled by {}).".format(d, dir_and_label[d]))
            label = dir_and_label[d]
            dir_path = os.path.join(target_dir, d)
            pathes = [os.path.join(dir_path, f) for f in os.listdir(dir_path)]
            pathes = [p for p in pathes if os.path.isfile(p)]
            task_length = int(math.ceil(len(pathes) / task_size))
            for i in xtqdm(range(task_length)):
                index = i * task_size
                tasks = pathes[index:(index + task_size)]
                lines = Parallel(n_jobs=-1)(
                        delayed(self._make_pair)(label, t) for t in tasks)
                mode = "w" if write_flg else "a"
                with open(file_path, mode=mode, encoding="utf-8") as f:
                    for ln in lines:
                        f.write(ln)
                write_flg = False