def __init__(self, root='data/omniglot', transform=None, target_transform=None, download=True):
self.root = root
self.transform = transform
self.target_transform = target_transform
if download: self.download()
assert self._check_exists(), 'Dataset not found. You can use download=True to download it'
self.all_items = find_classes(os.path.join(self.root, self.processed_folder))
self.classes = index_classes(self.all_items)
python类Dataset()的实例源码
def __init__(self, image_dir):
super(Dataset, self).__init__()
# self.path = image_dir
self.input_filenames = glob.glob(os.path.join(image_dir, "*.jpg"))
def get_custom_dataset(config):
dataset = None
if config.dataset_mode == 'aligned':
dataset = AlignedDataset()
elif config.dataset_mode == 'unaligned':
dataset = UnalignedDataset()
elif config.dataset_mode == 'single':
dataset = SingleDataset()
else:
raise ValueError("Dataset [%s] not recognized." % config.dataset_mode)
print("dataset [%s] was created" % (dataset.name()))
dataset.initialize(config)
return dataset
def get_loaders(data_name, vocab, crop_size, batch_size, workers, opt):
dpath = os.path.join(opt.data_path, data_name)
if opt.data_name.endswith('_precomp'):
train_loader = get_precomp_loader(dpath, 'train', vocab, opt,
batch_size, True, workers)
val_loader = get_precomp_loader(dpath, 'dev', vocab, opt,
batch_size, False, workers)
else:
# Build Dataset Loader
roots, ids = get_paths(dpath, data_name, opt.use_restval)
transform = get_transform(data_name, 'train', opt)
train_loader = get_loader_single(opt.data_name, 'train',
roots['train']['img'],
roots['train']['cap'],
vocab, transform, ids=ids['train'],
batch_size=batch_size, shuffle=True,
num_workers=workers,
collate_fn=collate_fn)
transform = get_transform(data_name, 'val', opt)
val_loader = get_loader_single(opt.data_name, 'val',
roots['val']['img'],
roots['val']['cap'],
vocab, transform, ids=ids['val'],
batch_size=batch_size, shuffle=False,
num_workers=workers,
collate_fn=collate_fn)
return train_loader, val_loader
triplet_mnist_loader.py 文件源码
项目:triplet-network-pytorch
作者: andreasveit
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def __init__(self, root, n_train_triplets=50000, n_test_triplets=10000, train=True, transform=None, target_transform=None, download=False):
self.root = root
self.transform = transform
self.train = train # training set or test set
if download:
self.download()
if not self._check_exists():
raise RuntimeError('Dataset not found.' +
' You can use download=True to download it')
if self.train:
self.train_data, self.train_labels = torch.load(
os.path.join(root, self.processed_folder, self.training_file))
self.make_triplet_list(n_train_triplets)
triplets = []
for line in open(os.path.join(root, self.processed_folder, self.train_triplet_file)):
triplets.append((int(line.split()[0]), int(line.split()[1]), int(line.split()[2]))) # anchor, close, far
self.triplets_train = triplets
else:
self.test_data, self.test_labels = torch.load(os.path.join(root, self.processed_folder, self.test_file))
self.make_triplet_list(n_test_triplets)
triplets = []
for line in open(os.path.join(root, self.processed_folder, self.test_triplet_file)):
triplets.append((int(line.split()[0]), int(line.split()[1]), int(line.split()[2]))) # anchor, close, far
self.triplets_test = triplets
def read_labels(self, filename):
with open(filename,'r') as f:
labels = map(lambda x: float(x), f.readlines())
labels = torch.Tensor(labels)
return labels
# Dataset class for SICK dataset
def __init__(self, root, transform=None, target_transform=None,
train=True, test=False, top=100, group=True,
additional_transform=None):
self.root = root
self.transform = transform
self.additional_transform = additional_transform
self.target_transform = target_transform
self.top_objects = top
self.top_folder = 'top_{0}'.format(top)
self.group = group
if not osp.exists(self.root):
raise RuntimeError('Dataset not found ' +
'please download it from: ' +
'http://visualgenome.org/api/v0/api_home.html')
if not self.__check_exists():
self.process_dataset()
# self.region_objects, self.obj_idx = self.load_region_objects()
if train:
train_file = osp.join(self.data_path, self.top_folder,
self.region_train_file)
with open(train_file, 'rb') as f:
self.regions = torch.load(f)
elif test:
test_file = osp.join(self.data_path, self.top_folder,
self.region_test_file)
with open(test_file, 'rb') as f:
self.regions = torch.load(f)
else:
val_file = osp.join(self.data_path, self.top_folder,
self.region_val_file)
with open(val_file, 'rb') as f:
self.regions = torch.load(f)
if self.group:
self.regions = self.__group_regions_by_id(self.regions)
corpus_file = osp.join(self.data_path, self.processed_folder,
self.corpus_file)
with open(corpus_file, 'rb') as f:
self.corpus = torch.load(f)
region_obj_file = osp.join(self.data_path, self.top_folder,
self.region_objects_file)
with open(region_obj_file, 'rb') as f:
self.region_objects = torch.load(f)
obj_idx_path = osp.join(self.data_path, self.top_folder,
self.obj_idx_file)
with open(obj_idx_path, 'rb') as f:
self.obj_idx = torch.load(f)
self.idx_obj = {v: k for k, v in self.obj_idx.items()}
# del region_objects
def __init__(self,train_root,labels_file,type_='char'):
'''
Dataset('/mnt/7/zhihu/ieee_zhihu_cup/train.npz','/mnt/7/zhihu/ieee_zhihu_cup/a.json')
'''
import json
with open(labels_file) as f:
labels_ = json.load(f)
# embedding_d = np.load(embedding_root)['vector']
question_d = np.load(train_root)
self.type_=type_
if type_ == 'char':
all_data_title,all_data_content =\
question_d['title_char'],question_d['content_char']
elif type_ == 'word':
all_data_title,all_data_content =\
question_d['title_word'],question_d['content_word']
self.train_data = all_data_title[:-20000],all_data_content[:-20000]
self.val_data = all_data_title[-20000:],all_data_content[-20000:]
self.all_num = len(all_data_content)
# del all_data_title,all_data_content
self.data_title,self.data_content = self.train_data
self.len_ = len(self.data_title)
self.index2qid = question_d['index2qid'].item()
self.l_end=0
self.labels = labels_['d']
# def augument(self,d):
# '''
# ?????: ????
# '''
# if self.type_=='char':
# _index = (-8,8)
# else :_index =(-5,5)
# r = d.new(d.size()).fill_(0)
# index = random.randint(-3,4)
# if _index >0:
# r[index:] = d[:-index]
# else:
# r[:-index] = d[index:]
# return r
# def augument(self,d,type_=1):
# if type_==1:
# return self.shuffle(d)
# else :
# if self.type_=='char':
# return self.dropout(d,p=0.6)
def eval_parser():
usage = 'Samples SMASH architectures and tests them on CIFAR.'
parser = ArgumentParser(description=usage)
parser.add_argument(
'--SMASH', type=str, default=None, metavar='FILE',
help='The SMASH network .pth file to evaluate.')
parser.add_argument(
'--batch-size', type=int, default=100,
help='Images per batch (default: %(default)s)')
parser.add_argument(
'--which-dataset', type=str, default='C100',
help='Which Dataset to train on (default: %(default)s)')
parser.add_argument(
'--seed', type=int, default=0,
help='Random seed to use.')
parser.add_argument(
'--validate', action='store_true', default=True,
help='Perform validation on validation set (ensabled by default)')
parser.add_argument(
'--validate-test', action='store_const', dest='validate',
const='test', help='Evaluate on test set after every epoch.')
parser.add_argument(
'--num-random', type=int, default=500,
help='Number of random architectures to sample (default: %(default)s)')
parser.add_argument(
'--num-perturb', type=int, default=100,
help='Number of random perturbations to sample (default: %(default)s)')
parser.add_argument(
'--num-markov', type=int, default=100,
help='Number of markov steps to take after perturbation (default: %(default)s)')
parser.add_argument(
'--perturb-prob', type=float, default=0.05,
help='Chance of any individual element being perturbed (default: %(default)s)')
parser.add_argument(
'--arch-SGD', action='store_true', default=False,
help='Perturb archs with architectural SGD. (default: %(default)s)')
parser.add_argument(
'--fp16', action='store_true', default=False,
help='Evaluate with half-precision. (default: %(default)s)')
parser.add_argument(
'--parallel', action='store_true', default=False,
help='Evaluate with multiple GPUs. (default: %(default)s)')
return parser
def create_dataset(
problem_size,
data_dir):
def find_or_return_empty(data_dir, problem_size):
#train_fname1 = os.path.join(data_dir, 'tsp{}.txt'.format(problem_size))
val_fname1 = os.path.join(data_dir, 'tsp{}_test.txt'.format(problem_size))
#train_fname2 = os.path.join(data_dir, 'tsp-{}.txt'.format(problem_size))
val_fname2 = os.path.join(data_dir, 'tsp-{}_test.txt'.format(problem_size))
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
else:
# if os.path.exists(train_fname1) and os.path.exists(val_fname1):
# return train_fname1, val_fname1
# if os.path.exists(train_fname2) and os.path.exists(val_fname2):
# return train_fname2, val_fname2
# return None, None
# train, val = find_or_return_empty(data_dir, problem_size)
# if train is None and val is None:
# download_google_drive_file(data_dir,
# 'tsp', '', problem_size)
# train, val = find_or_return_empty(data_dir, problem_size)
# return train, val
if os.path.exists(val_fname1):
return val_fname1
if os.path.exists(val_fname2):
return val_fname2
return None
val = find_or_return_empty(data_dir, problem_size)
if val is None:
download_google_drive_file(data_dir, 'tsp', '', problem_size)
val = find_or_return_empty(data_dir, problem_size)
return val
#######################################
# Dataset
#######################################