def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))
python类DataLoader()的实例源码
def test_shuffle_batch(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
def test_sequential_workers(self):
self._test_sequential(DataLoader(self.dataset, num_workers=4))
def test_shuffle_workers(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
def test_shuffle_batch_workers(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
def test_error_workers(self):
self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
def test_partial_workers(self):
"check that workers exit even if the iterator is not exhausted"
loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4))
workers = loader.workers
for i, sample in enumerate(loader):
if i == 3:
break
del loader
for w in workers:
w.join(1.0) # timeout of one second
self.assertFalse(w.is_alive(), 'subprocess not terminated')
self.assertEqual(w.exitcode, 0)
def parallel(self, *args, **kwargs):
return DataLoader(self, *args, **kwargs)
def main():
training_batch_size = 352
validation_batch_size = 352
net = get_res152(num_classes=num_classes, snapshot_path=os.path.join(
ckpt_path, 'epoch_15_validation_loss_0.0772_iter_1000.pth')).cuda()
net.eval()
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.311, 0.340, 0.299], [0.167, 0.144, 0.138])
])
criterion = nn.MultiLabelSoftMarginLoss().cuda()
train_set = MultipleClassImageFolder(split_train_dir, transform)
train_loader = DataLoader(train_set, batch_size=training_batch_size, num_workers=16)
batch_outputs, batch_labels = predict(net, train_loader)
loss = criterion(batch_outputs, batch_labels)
print 'training loss %.4f' % loss.cpu().data.numpy()[0]
batch_outputs = batch_outputs.cpu().data.numpy()
batch_labels = batch_labels.cpu().data.numpy()
thretholds = find_best_threthold(batch_outputs, batch_labels)
val_set = MultipleClassImageFolder(split_val_dir, transform)
val_loader = DataLoader(val_set, batch_size=validation_batch_size, num_workers=16)
batch_outputs, batch_labels = predict(net, val_loader)
loss = criterion(batch_outputs, batch_labels)
print 'validation loss %.4f' % loss.cpu().data.numpy()[0]
batch_outputs = batch_outputs.cpu().data.numpy()
batch_labels = batch_labels.cpu().data.numpy()
sio.savemat('./val_output.mat', {'outputs': batch_outputs, 'labels': batch_labels})
prediction = get_one_hot_prediction(batch_outputs, thretholds)
evaluation = evaluate(prediction, batch_labels)
print 'validation evaluation: accuracy %.4f, precision %.4f, recall %.4f, f2 %.4f' % (
evaluation[0], evaluation[1], evaluation[2], evaluation[3])
def test_sequence_wise_torch_data_loader():
import torch
from torch.utils import data as data_utils
X, Y = _get_small_datasets(padded=False)
class TorchDataset(data_utils.Dataset):
def __init__(self, X, Y):
self.X = X
self.Y = Y
def __getitem__(self, idx):
return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])
def __len__(self):
return len(self.X)
def __test(X, Y, batch_size):
dataset = TorchDataset(X, Y)
loader = data_utils.DataLoader(
dataset, batch_size=batch_size, num_workers=1, shuffle=True)
for idx, (x, y) in enumerate(loader):
assert len(x.shape) == len(y.shape)
assert len(x.shape) == 3
print(idx, x.shape, y.shape)
# Test with batch_size = 1
yield __test, X, Y, 1
# Since we have variable length frames, batch size larger than 1 causes
# runtime error.
yield raises(RuntimeError)(__test), X, Y, 2
# For padded dataset, which can be reprensented by (N, T^max, D), batchsize
# can be any number.
X, Y = _get_small_datasets(padded=True)
yield __test, X, Y, 1
yield __test, X, Y, 2
def test_frame_wise_torch_data_loader():
import torch
from torch.utils import data as data_utils
X, Y = _get_small_datasets(padded=False)
# Since torch's Dataset (and Chainer, and maybe others) assumes dataset has
# fixed size length, i.e., implements `__len__` method, we need to know
# number of frames for each utterance.
# Sum of the number of frames is the dataset size for frame-wise iteration.
lengths = np.array([len(x) for x in X], dtype=np.int)
# For the above reason, we need to explicitly give the number of frames.
X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X))
Y = MemoryCacheFramewiseDataset(Y, lengths, cache_size=len(Y))
class TorchDataset(data_utils.Dataset):
def __init__(self, X, Y):
self.X = X
self.Y = Y
def __getitem__(self, idx):
return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])
def __len__(self):
return len(self.X)
def __test(X, Y, batch_size):
dataset = TorchDataset(X, Y)
loader = data_utils.DataLoader(
dataset, batch_size=batch_size, num_workers=1, shuffle=True)
for idx, (x, y) in enumerate(loader):
assert len(x.shape) == 2
assert len(y.shape) == 2
yield __test, X, Y, 128
yield __test, X, Y, 256
def __init__(self, trainer, dataset, start_epoch=0, momentum=0, batch_size=96):
super(SemiSupervisedUpdater, self).__init__()
self.trainer = trainer
self.dataset = dataset
self.start_epoch = start_epoch
self.loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=10, pin_memory=True)
self.momentum = momentum
def get_test_loader(test_images, transformations):
dset_test = KaggleAmazonTestDataset(test_images, paths.test_jpg, '.jpg', transformations, divide=False)
loader_val = DataLoader(dset_test,
batch_size=batch_size,
num_workers=12,
pin_memory=True)
return loader_val
def __init__(self, opt, shared=None):
opt['batch_sort'] = False
super().__init__(opt, shared)
self.use_batch_act = self.bsz > 1
self.num_workers = opt['numworkers']
# One can specify a collate function to use for preparing a batch
collate_fn = opt.get('collate_fn', default_collate)
if not shared:
self.dataset = StreamDataset(opt)
self.pytorch_dataloader = DataLoader(
self.dataset,
batch_size=self.bsz,
shuffle=False,
sampler=sampler.SequentialSampler(self.dataset),
num_workers=self.num_workers,
collate_fn=collate_fn,
pin_memory=False,
drop_last=False,
)
self.lastYs = [None] * self.bsz
else:
self.dataset = shared['dataset']
self.pytorch_dataloader = shared['pytorch_dataloader']
self.lastYs = shared['lastYs']
self.num_batches = math.ceil(self.dataset.num_examples()/self.bsz)
self.reset()
def setup_data_loaders(dataset, use_cuda, batch_size, sup_num=None, root='./data', download=True, **kwargs):
"""
helper function for setting up pytorch data loaders for a semi-supervised dataset
:param dataset: the data to use
:param use_cuda: use GPU(s) for training
:param batch_size: size of a batch of data to output when iterating over the data loaders
:param sup_num: number of supervised data examples
:param root: where on the filesystem should the dataset be
:param download: download the dataset (if it doesn't exist already)
:param kwargs: other params for the pytorch data loader
:return: three data loaders: (supervised data for training, un-supervised data for training,
supervised data for testing)
"""
# instantiate the dataset as training/testing sets
if 'num_workers' not in kwargs:
kwargs = {'num_workers': 0, 'pin_memory': False}
cached_data = {}
loaders = {}
for mode in ["unsup", "test", "sup", "valid"]:
if sup_num is None and mode == "sup":
# in this special case, we do not want "sup" and "valid" data loaders
return loaders["unsup"], loaders["test"]
cached_data[mode] = dataset(root=root, mode=mode, download=download,
sup_num=sup_num, use_cuda=use_cuda)
loaders[mode] = DataLoader(cached_data[mode], batch_size=batch_size, shuffle=True, **kwargs)
return loaders
def train_loader(self, value):
assert isinstance(value, DataLoader)
self._loaders.update({'train': value})
def validate_loader(self, value):
assert isinstance(value, DataLoader)
self._loaders.update({'validate': value})
def get_loader(image_path, image_size, batch_size, num_workers=2):
"""Builds and returns Dataloader."""
transform = transforms.Compose([
transforms.Scale(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset = ImageFolder(image_path, transform)
data_loader = data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)
return data_loader
def get_loader(image_path, image_size, batch_size, transform, num_workers=2):
dataset = ImageFolder(image_path, transform)
data_laoder = data.DataLoader(
dataset = dataset,
batch_size = batch_size,
shuffle = True,
num_workers = num_workers
)
return data_loader