python类DataLoader()的实例源码

test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_shuffle(self):
        self._test_shuffle(DataLoader(self.dataset, shuffle=True))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_shuffle_batch(self):
        self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_sequential_workers(self):
        self._test_sequential(DataLoader(self.dataset, num_workers=4))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_shuffle_workers(self):
        self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def test_shuffle_batch_workers(self):
        self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_error(self):
        self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_error_workers(self):
        self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
test_dataloader.py 文件源码 项目:pytorch-dist 作者: apaszke 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_partial_workers(self):
        "check that workers exit even if the iterator is not exhausted"
        loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4))
        workers = loader.workers
        for i, sample in enumerate(loader):
            if i == 3:
                break
        del loader
        for w in workers:
            w.join(1.0)  # timeout of one second
            self.assertFalse(w.is_alive(), 'subprocess not terminated')
            self.assertEqual(w.exitcode, 0)
dataset.py 文件源码 项目:tnt 作者: pytorch 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parallel(self, *args, **kwargs):
        return DataLoader(self, *args, **kwargs)
find_best_threthold.py 文件源码 项目:kaggle-planet 作者: ZijunDeng 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def main():
    training_batch_size = 352
    validation_batch_size = 352

    net = get_res152(num_classes=num_classes, snapshot_path=os.path.join(
        ckpt_path, 'epoch_15_validation_loss_0.0772_iter_1000.pth')).cuda()
    net.eval()

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.311, 0.340, 0.299], [0.167, 0.144, 0.138])
    ])
    criterion = nn.MultiLabelSoftMarginLoss().cuda()

    train_set = MultipleClassImageFolder(split_train_dir, transform)
    train_loader = DataLoader(train_set, batch_size=training_batch_size, num_workers=16)
    batch_outputs, batch_labels = predict(net, train_loader)
    loss = criterion(batch_outputs, batch_labels)
    print 'training loss %.4f' % loss.cpu().data.numpy()[0]
    batch_outputs = batch_outputs.cpu().data.numpy()
    batch_labels = batch_labels.cpu().data.numpy()
    thretholds = find_best_threthold(batch_outputs, batch_labels)

    val_set = MultipleClassImageFolder(split_val_dir, transform)
    val_loader = DataLoader(val_set, batch_size=validation_batch_size, num_workers=16)
    batch_outputs, batch_labels = predict(net, val_loader)
    loss = criterion(batch_outputs, batch_labels)
    print 'validation loss %.4f' % loss.cpu().data.numpy()[0]
    batch_outputs = batch_outputs.cpu().data.numpy()
    batch_labels = batch_labels.cpu().data.numpy()
    sio.savemat('./val_output.mat', {'outputs': batch_outputs, 'labels': batch_labels})
    prediction = get_one_hot_prediction(batch_outputs, thretholds)
    evaluation = evaluate(prediction, batch_labels)
    print 'validation evaluation: accuracy %.4f, precision %.4f, recall %.4f, f2 %.4f' % (
        evaluation[0], evaluation[1], evaluation[2], evaluation[3])
test_datasets.py 文件源码 项目:nnmnkwii 作者: r9y9 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_sequence_wise_torch_data_loader():
    import torch
    from torch.utils import data as data_utils

    X, Y = _get_small_datasets(padded=False)

    class TorchDataset(data_utils.Dataset):
        def __init__(self, X, Y):
            self.X = X
            self.Y = Y

        def __getitem__(self, idx):
            return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

        def __len__(self):
            return len(self.X)

    def __test(X, Y, batch_size):
        dataset = TorchDataset(X, Y)
        loader = data_utils.DataLoader(
            dataset, batch_size=batch_size, num_workers=1, shuffle=True)
        for idx, (x, y) in enumerate(loader):
            assert len(x.shape) == len(y.shape)
            assert len(x.shape) == 3
            print(idx, x.shape, y.shape)

    # Test with batch_size = 1
    yield __test, X, Y, 1
    # Since we have variable length frames, batch size larger than 1 causes
    # runtime error.
    yield raises(RuntimeError)(__test), X, Y, 2

    # For padded dataset, which can be reprensented by (N, T^max, D), batchsize
    # can be any number.
    X, Y = _get_small_datasets(padded=True)
    yield __test, X, Y, 1
    yield __test, X, Y, 2
test_datasets.py 文件源码 项目:nnmnkwii 作者: r9y9 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_frame_wise_torch_data_loader():
    import torch
    from torch.utils import data as data_utils

    X, Y = _get_small_datasets(padded=False)

    # Since torch's Dataset (and Chainer, and maybe others) assumes dataset has
    # fixed size length, i.e., implements `__len__` method, we need to know
    # number of frames for each utterance.
    # Sum of the number of frames is the dataset size for frame-wise iteration.
    lengths = np.array([len(x) for x in X], dtype=np.int)

    # For the above reason, we need to explicitly give the number of frames.
    X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X))
    Y = MemoryCacheFramewiseDataset(Y, lengths, cache_size=len(Y))

    class TorchDataset(data_utils.Dataset):
        def __init__(self, X, Y):
            self.X = X
            self.Y = Y

        def __getitem__(self, idx):
            return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

        def __len__(self):
            return len(self.X)

    def __test(X, Y, batch_size):
        dataset = TorchDataset(X, Y)
        loader = data_utils.DataLoader(
            dataset, batch_size=batch_size, num_workers=1, shuffle=True)
        for idx, (x, y) in enumerate(loader):
            assert len(x.shape) == 2
            assert len(y.shape) == 2

    yield __test, X, Y, 128
    yield __test, X, Y, 256
callbacks.py 文件源码 项目:KagglePlanetPytorch 作者: Mctigger 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, trainer, dataset, start_epoch=0, momentum=0, batch_size=96):
        super(SemiSupervisedUpdater, self).__init__()

        self.trainer = trainer
        self.dataset = dataset
        self.start_epoch = start_epoch
        self.loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=10, pin_memory=True)
        self.momentum = momentum
predict.py 文件源码 项目:KagglePlanetPytorch 作者: Mctigger 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_test_loader(test_images, transformations):
    dset_test = KaggleAmazonTestDataset(test_images, paths.test_jpg, '.jpg', transformations, divide=False)
    loader_val = DataLoader(dset_test,
                            batch_size=batch_size,
                            num_workers=12,
                            pin_memory=True)
    return loader_val
pytorch_data_teacher.py 文件源码 项目:ParlAI 作者: facebookresearch 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, opt, shared=None):
        opt['batch_sort'] = False
        super().__init__(opt, shared)
        self.use_batch_act = self.bsz > 1
        self.num_workers = opt['numworkers']
        # One can specify a collate function to use for preparing a batch
        collate_fn = opt.get('collate_fn', default_collate)
        if not shared:
            self.dataset = StreamDataset(opt)
            self.pytorch_dataloader = DataLoader(
                self.dataset,
                batch_size=self.bsz,
                shuffle=False,
                sampler=sampler.SequentialSampler(self.dataset),
                num_workers=self.num_workers,
                collate_fn=collate_fn,
                pin_memory=False,
                drop_last=False,
                )
            self.lastYs = [None] * self.bsz
        else:
            self.dataset = shared['dataset']
            self.pytorch_dataloader = shared['pytorch_dataloader']
            self.lastYs = shared['lastYs']

        self.num_batches = math.ceil(self.dataset.num_examples()/self.bsz)
        self.reset()
mnist_cached.py 文件源码 项目:pyro 作者: uber 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def setup_data_loaders(dataset, use_cuda, batch_size, sup_num=None, root='./data', download=True, **kwargs):
    """
        helper function for setting up pytorch data loaders for a semi-supervised dataset
    :param dataset: the data to use
    :param use_cuda: use GPU(s) for training
    :param batch_size: size of a batch of data to output when iterating over the data loaders
    :param sup_num: number of supervised data examples
    :param root: where on the filesystem should the dataset be
    :param download: download the dataset (if it doesn't exist already)
    :param kwargs: other params for the pytorch data loader
    :return: three data loaders: (supervised data for training, un-supervised data for training,
                                  supervised data for testing)
    """
    # instantiate the dataset as training/testing sets
    if 'num_workers' not in kwargs:
        kwargs = {'num_workers': 0, 'pin_memory': False}

    cached_data = {}
    loaders = {}
    for mode in ["unsup", "test", "sup", "valid"]:
        if sup_num is None and mode == "sup":
            # in this special case, we do not want "sup" and "valid" data loaders
            return loaders["unsup"], loaders["test"]
        cached_data[mode] = dataset(root=root, mode=mode, download=download,
                                    sup_num=sup_num, use_cuda=use_cuda)
        loaders[mode] = DataLoader(cached_data[mode], batch_size=batch_size, shuffle=True, **kwargs)

    return loaders
basic.py 文件源码 项目:inferno 作者: inferno-pytorch 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def train_loader(self, value):
        assert isinstance(value, DataLoader)
        self._loaders.update({'train': value})
basic.py 文件源码 项目:inferno 作者: inferno-pytorch 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def validate_loader(self, value):
        assert isinstance(value, DataLoader)
        self._loaders.update({'validate': value})
data_loader.py 文件源码 项目:pytorch-tutorial 作者: yunjey 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_loader(image_path, image_size, batch_size, num_workers=2):
    """Builds and returns Dataloader."""

    transform = transforms.Compose([
                    transforms.Scale(image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    dataset = ImageFolder(image_path, transform)
    data_loader = data.DataLoader(dataset=dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    return data_loader
data_loader.py 文件源码 项目:LSGAN.pytorch 作者: meliketoy 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_loader(image_path, image_size, batch_size, transform, num_workers=2):
    dataset = ImageFolder(image_path, transform)
    data_laoder = data.DataLoader(
        dataset = dataset,
        batch_size = batch_size,
        shuffle = True,
        num_workers = num_workers
    )

    return data_loader


问题


面经


文章

微信
公众号

扫码关注公众号