def tpn_raw_data(data_path):
# return the paths of training and validation zip files
train_set = sorted(glob.glob(osp.join(data_path, 'train/*')))
val_set = sorted(glob.glob(osp.join(data_path, 'val/*')))
valid_train_set = []
valid_val_set = []
for set_name, orig_set, valid_set in \
[('train', train_set, valid_train_set), ('val', val_set, valid_val_set)]:
print "Checking {} set files...".format(set_name)
for ind, orig_vid in enumerate(orig_set, start=1):
if zipfile.is_zipfile(orig_vid):
valid_set.append(orig_vid)
elif osp.isdir(orig_vid):
valid_set.append(orig_vid)
else:
print "{} is not a valid zip file or a directory".format(orig_vid)
if ind % 1000 == 0:
print "{} files checked.".format(ind)
if ind % 1000 != 0:
print "Totally {} files checked.".format(ind)
return valid_train_set, valid_val_set
评论列表
文章目录