def convert2tensor(self, dataset, batch_size, limit):
data = dataset['data']
data = data[:limit]
print("normalizing images...")
data = common.normalize(data)
print("done")
target = dataset['labels']
target = target[:limit]
target = np.asarray(target)
tensor_data = torch.from_numpy(data)
tensor_data = tensor_data.float()
tensor_target = torch.from_numpy(target)
loader = data_utils.TensorDataset(tensor_data, tensor_target)
loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True)
return loader_dataset
python类DataLoader()的实例源码
def convert2tensor(self, dataset, batch_size, limit):
b_data = dataset['X']
b_data = b_data[:limit]
print("normalizing images...")
b_data = common.normalize(b_data)
print("done")
target = dataset['y']
target = target.reshape((len(target)))
target = target[:limit]
"""SVHN dataset is between 1 to 10: shift this to 0 to 9 to fit with neural network"""
target = target - 1
data = []
for i in range(len(target)):
data.append(b_data[:,:,:,i])
data = np.asarray(data)
tensor_data = torch.from_numpy(data)
tensor_data = tensor_data.float()
tensor_target = torch.from_numpy(target)
loader = data_utils.TensorDataset(tensor_data, tensor_target)
loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True)
return loader_dataset
def get_loader(df, transformations):
dset_val = KaggleAmazonJPGDataset(df, paths.train_jpg, transformations, divide=False)
loader_val = DataLoader(dset_val,
batch_size=batch_size,
num_workers=12,
pin_memory=True,)
return loader_val
def get_data_loader(dataset_name,
batch_size=1,
dataset_transforms=None,
is_training_set=True,
shuffle=True):
if not dataset_transforms:
dataset_transforms = []
trans = transforms.Compose([transforms.ToTensor()] + dataset_transforms)
dataset = getattr(datasets, dataset_name)
return DataLoader(
dataset(root=DATA_DIR,
train=is_training_set,
transform=trans,
download=True),
batch_size=batch_size,
shuffle=shuffle
)
def load_data(self):
print('=' * 50)
print('Loading data...')
transform = transforms.Compose([
transforms.ImageOps.grayscale,
transforms.Scale((cfg.img_width, cfg.img_height)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
syn_train_folder = torchvision.datasets.ImageFolder(root=cfg.syn_path, transform=transform)
# print(syn_train_folder)
self.syn_train_loader = Data.DataLoader(syn_train_folder, batch_size=cfg.batch_size, shuffle=True,
pin_memory=True)
print('syn_train_batch %d' % len(self.syn_train_loader))
real_folder = torchvision.datasets.ImageFolder(root=cfg.real_path, transform=transform)
# real_folder.imgs = real_folder.imgs[:2000]
self.real_loader = Data.DataLoader(real_folder, batch_size=cfg.batch_size, shuffle=True,
pin_memory=True)
print('real_batch %d' % len(self.real_loader))
spatial_dataloader.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def train(self):
training_set = spatial_dataset(dic=self.dic_training, root_dir=self.data_path, mode='train', transform = transforms.Compose([
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
]))
print '==> Training data :',len(training_set),'frames'
print training_set[1][0]['img1'].size()
train_loader = DataLoader(
dataset=training_set,
batch_size=self.BATCH_SIZE,
shuffle=True,
num_workers=self.num_workers)
return train_loader
spatial_dataloader.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def validate(self):
validation_set = spatial_dataset(dic=self.dic_testing, root_dir=self.data_path, mode='val', transform = transforms.Compose([
transforms.Scale([224,224]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
]))
print '==> Validation data :',len(validation_set),'frames'
print validation_set[1][1].size()
val_loader = DataLoader(
dataset=validation_set,
batch_size=self.BATCH_SIZE,
shuffle=False,
num_workers=self.num_workers)
return val_loader
motion_dataloader.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def train(self):
training_set = motion_dataset(dic=self.dic_video_train, in_channel=self.in_channel, root_dir=self.data_path,
mode='train',
transform = transforms.Compose([
transforms.Scale([224,224]),
transforms.ToTensor(),
]))
print '==> Training data :',len(training_set),' videos',training_set[1][0].size()
train_loader = DataLoader(
dataset=training_set,
batch_size=self.BATCH_SIZE,
shuffle=True,
num_workers=self.num_workers,
pin_memory=True
)
return train_loader
motion_dataloader.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def val(self):
validation_set = motion_dataset(dic= self.dic_test_idx, in_channel=self.in_channel, root_dir=self.data_path ,
mode ='val',
transform = transforms.Compose([
transforms.Scale([224,224]),
transforms.ToTensor(),
]))
print '==> Validation data :',len(validation_set),' frames',validation_set[1][1].size()
#print validation_set[1]
val_loader = DataLoader(
dataset=validation_set,
batch_size=self.BATCH_SIZE,
shuffle=False,
num_workers=self.num_workers)
return val_loader
def test_sequential_batch(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
batch_size = loader.batch_size
for i, sample in enumerate(loader):
idx = i * batch_size
self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})
t = sample['a_tensor']
self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
self.assertTrue((t[0] == idx).all())
self.assertTrue((t[1] == idx + 1).all())
n = sample['another_dict']['a_number']
self.assertEqual(n.size(), torch.Size([batch_size]))
self.assertEqual(n[0], idx)
self.assertEqual(n[1], idx + 1)
def _test_batch_sampler(self, **kwargs):
# [(0, 1), (2, 3, 4), (5, 6), (7, 8, 9), ...]
batches = []
for i in range(0, 100, 5):
batches.append(tuple(range(i, i + 2)))
batches.append(tuple(range(i + 2, i + 5)))
dl = DataLoader(self.dataset, batch_sampler=batches, **kwargs)
self.assertEqual(len(dl), 40)
for i, (input, _target) in enumerate(dl):
if i % 2 == 0:
offset = i * 5 // 2
self.assertEqual(len(input), 2)
self.assertEqual(input, self.data[offset:offset + 2])
else:
offset = i * 5 // 2
self.assertEqual(len(input), 3)
self.assertEqual(input, self.data[offset:offset + 3])
def test_sequential_batch(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
batch_size = loader.batch_size
for i, sample in enumerate(loader):
idx = i * batch_size
self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})
t = sample['a_tensor']
self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
self.assertTrue((t[0] == idx).all())
self.assertTrue((t[1] == idx + 1).all())
n = sample['another_dict']['a_number']
self.assertEqual(n.size(), torch.Size([batch_size]))
self.assertEqual(n[0], idx)
self.assertEqual(n[1], idx + 1)
def get_loaders(loader_batchsize, **kwargs):
arguments=kwargs['arguments']
data = arguments.data
if data == 'mnist':
kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize((0,), (1,))
])),
batch_size=loader_batchsize, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize((7,), (0.3081,))
])),
batch_size=loader_batchsize, shuffle=True, **kwargs)
return train_loader, test_loader
def form_torch_mixture_dataset(MSabs, MSphase,
SPCS1abs, SPCS2abs,
wavfls1, wavfls2,
lens1, lens2,
arguments):
MSabs = torch.from_numpy(np.array(MSabs))
MSphase = torch.from_numpy(np.array(MSphase))
SPCS1abs = torch.from_numpy(np.array(SPCS1abs))
SPCS2abs = torch.from_numpy(np.array(SPCS2abs))
wavfls1 = torch.from_numpy(np.array(wavfls1))
wavfls2 = torch.from_numpy(np.array(wavfls2))
dataset = MixtureDataset(MSabs, MSphase, SPCS1abs, SPCS2abs,
wavfls1, wavfls2, lens1, lens2)
kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {}
loader = data_utils.DataLoader(dataset, batch_size=arguments.batch_size, shuffle=False, **kwargs)
return loader
def _test_batch_sampler(self, **kwargs):
# [(0, 1), (2, 3, 4), (5, 6), (7, 8, 9), ...]
batches = []
for i in range(0, 100, 5):
batches.append(tuple(range(i, i + 2)))
batches.append(tuple(range(i + 2, i + 5)))
dl = DataLoader(self.dataset, batch_sampler=batches, **kwargs)
self.assertEqual(len(dl), 40)
for i, (input, _target) in enumerate(dl):
if i % 2 == 0:
offset = i * 5 // 2
self.assertEqual(len(input), 2)
self.assertEqual(input, self.data[offset:offset + 2])
else:
offset = i * 5 // 2
self.assertEqual(len(input), 3)
self.assertEqual(input, self.data[offset:offset + 3])
def test_sequential_batch(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
batch_size = loader.batch_size
for i, sample in enumerate(loader):
idx = i * batch_size
self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})
t = sample['a_tensor']
self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
self.assertTrue((t[0] == idx).all())
self.assertTrue((t[1] == idx + 1).all())
n = sample['another_dict']['a_number']
self.assertEqual(n.size(), torch.Size([batch_size]))
self.assertEqual(n[0], idx)
self.assertEqual(n[1], idx + 1)
def check_data(self, _, y):
if y is None and self.iterator_train is DataLoader:
raise ValueError("No y-values are given (y=None). You must "
"implement your own DataLoader for training "
"(and your validation) and supply it using the "
"``iterator_train`` and ``iterator_valid`` "
"parameters respectively.")
elif y is None:
# The user implements its own mechanism for generating y.
return
# The problem with 1-dim float y is that the pytorch DataLoader will
# somehow upcast it to DoubleTensor
if get_dim(y) == 1:
raise ValueError("The target data shouldn't be 1-dimensional; "
"please reshape (e.g. y.reshape(-1, 1).")
# pylint: disable=signature-differs
def check_aug():
nfold = 0
tst_dataset = CSVDataset_tst(f'../../_data/fold{nfold}/train.csv')
tst = data.DataLoader(tst_dataset, batch_size=1, shuffle=False, num_workers=8)
for j, val_data in enumerate(tst, 0):
if j == 3:
inputs, labels = val_data
inputs, labels = inputs.numpy()[0], labels.numpy()[0]
print(inputs.shape, labels.shape, np.amax(inputs), np.amin(inputs), np.mean(inputs))
for i in range(13):
plt.subplot(3, 5, 1 + i)
plt.imshow(np.transpose(inputs[i], (1, 2, 0)))
break
plt.show()
def check_aug():
nfold = 0
tst_dataset = CSVDataset_tst(f'../../_data/fold{nfold}/train.csv')
tst = data.DataLoader(tst_dataset, batch_size=1, shuffle=False, num_workers=8)
for j, val_data in enumerate(tst, 0):
if j == 3:
inputs, labels = val_data
inputs, labels = inputs.numpy()[0], labels.numpy()[0]
print(inputs.shape, labels.shape, np.amax(inputs), np.amin(inputs), np.mean(inputs))
for i in range(13):
plt.subplot(3, 5, 1 + i)
plt.imshow(np.transpose(inputs[i], (1, 2, 0)))
break
plt.show()
def validate(args):
# Setup Dataloader
data_loader = get_loader(args.dataset)
data_path = get_data_path(args.dataset)
loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols))
n_classes = loader.n_classes
valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4)
running_metrics = runningScore(n_classes)
# Setup Model
model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
state = convert_state_dict(torch.load(args.model_path)['model_state'])
model.load_state_dict(state)
model.eval()
for i, (images, labels) in tqdm(enumerate(valloader)):
model.cuda()
images = Variable(images.cuda(), volatile=True)
labels = Variable(labels.cuda(), volatile=True)
outputs = model(images)
pred = outputs.data.max(1)[1].cpu().numpy()
gt = labels.data.cpu().numpy()
running_metrics.update(gt, pred)
score, class_iou = running_metrics.get_scores()
for k, v in score.items():
print(k, v)
for i in range(n_classes):
print(i, class_iou[i])
def train(args):
# Setup Dataloader
data_loader = get_loader(args.dataset)
data_path = get_data_path(args.dataset)
loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols))
n_classes = loader.n_classes
trainloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, shuffle=True)
# Setup visdom for visualization
if args.visdom:
vis = visdom.Visdom()
loss_window = vis.line(X=torch.zeros((1,)).cpu(),
Y=torch.zeros((1)).cpu(),
opts=dict(xlabel='minibatches',
ylabel='Loss',
title='Training Loss',
legend=['Loss']))
# Setup Model
model = get_model(args.arch, n_classes)
model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
model.cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4)
for epoch in range(args.n_epoch):
for i, (images, labels) in enumerate(trainloader):
images = Variable(images.cuda())
labels = Variable(labels.cuda())
optimizer.zero_grad()
outputs = model(images)
loss = cross_entropy2d(outputs, labels)
loss.backward()
optimizer.step()
if args.visdom:
vis.line(
X=torch.ones((1, 1)).cpu() * i,
Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(),
win=loss_window,
update='append')
if (i+1) % 20 == 0:
print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0]))
torch.save(model, "{}_{}_{}_{}.pkl".format(args.arch, args.dataset, args.feature_scale, epoch))
def CreateDataLoader(opt):
random.seed(opt.manualSeed)
# folder dataset
CTrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
VTrans = transforms.Compose([
RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
STrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder(rootC=opt.datarootC,
rootS=opt.datarootS,
transform=CTrans,
vtransform=VTrans,
stransform=STrans
)
assert dataset
return data.DataLoader(dataset, batch_size=opt.batchSize,
shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt):
random.seed(opt.manualSeed)
# folder dataset
CTrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
VTrans = transforms.Compose([
RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
STrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder(rootC=opt.datarootC,
rootS=opt.datarootS,
transform=CTrans,
vtransform=VTrans,
stransform=STrans
)
assert dataset
return data.DataLoader(dataset, batch_size=opt.batchSize,
shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt):
random.seed(opt.manualSeed)
# folder dataset
CTrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
VTrans = transforms.Compose([
RandomSizedCrop(224, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
STrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder(rootC=opt.datarootC,
rootS=opt.datarootS,
transform=CTrans,
vtransform=VTrans,
stransform=STrans
)
assert dataset
return data.DataLoader(dataset, batch_size=opt.batchSize,
shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt):
random.seed(opt.manualSeed)
# folder dataset
CTrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
VTrans = transforms.Compose([
RandomSizedCrop(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
STrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder(rootC=opt.datarootC,
rootS=opt.datarootS,
transform=CTrans,
vtransform=VTrans,
stransform=STrans
)
assert dataset
return data.DataLoader(dataset, batch_size=opt.batchSize,
shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt):
random.seed(opt.manualSeed)
# folder dataset
CTrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
VTrans = transforms.Compose([
RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
def jitter(x):
ran = random.uniform(0.7, 1)
return x * ran + 1 - ran
STrans = transforms.Compose([
transforms.Scale(opt.imageSize, Image.BICUBIC),
transforms.ToTensor(),
transforms.Lambda(jitter),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder(rootC=opt.datarootC,
rootS=opt.datarootS,
transform=CTrans,
vtransform=VTrans,
stransform=STrans
)
assert dataset
return data.DataLoader(dataset, batch_size=opt.batchSize,
shuffle=True, num_workers=int(opt.workers), drop_last=True)
def get_loader(chunk_list):
data = []
label = []
for f in chunk_list:
print ('Loading data from %s' %f)
with h5py.File(f, 'r') as hf:
data.append(np.asarray(hf['data']))
label.append(np.asarray(hf['label']))
data = torch.FloatTensor(np.concatenate(data, axis = 0))
label = torch.FloatTensor(np.concatenate(label, axis = 0))
print ('Total %d frames loaded' %data.size(0))
dset_train = TensorDataset(data, label)
loader_train = DataLoader(dset_train, batch_size = 256, shuffle = True, num_workers = 10, pin_memory = False)
return loader_train
def train(model, db, args, bsz=32, eph=1, use_cuda=False):
print("Training...")
trainloader = data_utils.DataLoader(dataset=db, batch_size=bsz, shuffle=True)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
best_loss = 100000
for epoch in range(eph):
running_loss = 0.0
for i, data in enumerate(trainloader, 1):
inputs, targets = data
inputs = inputs.unsqueeze(1)
targets = target_onehot_to_classnum_tensor(targets)
if use_cuda and cuda_ava:
inputs = Variable(inputs.float().cuda())
targets = Variable(targets.cuda())
else:
inputs = Variable(inputs.float())
targets = Variable(targets)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.data[0]
last_loss = loss.data[0]
if i % 100 == 0:
print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
running_loss = 0
if last_loss < best_loss:
best_loss = last_loss
acc = evaluate(model, trainloader, use_cuda)
torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
acc = evaluate(model, trainloader, use_cuda)
torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_all_acc_{}.t7'.format(acc)))
print("Finished Training!")
def test_sequential(self):
self._test_sequential(DataLoader(self.dataset))
def test_sequential_batch(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2))