def get_cifar10(save_dir=None, root_path=None):
''' If root_path is None, we download the data set from internet.
Either save path or root path must not be None and not both.
Returns Xtr, Ytr, Xte, Yte as numpy arrays
'''
assert((save_dir is not None and root_path is None) or (save_dir is None and root_path is not None))
if root_path is None:
print 'Downloading CIFAR10 dataset...'
tar_path = os.path.join(save_dir, "cifar-10-python.tar.gz")
url = urllib.URLopener()
url.retrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", tar_path)
print 'Download Done, Extracting...'
tar = tarfile.open(tar_path)
tar.extractall(save_dir)
tar.close()
root = os.path.join(save_dir, "cifar-10-batches-py") if not root_path else root_path
# Training Data
xs = []
ys = []
for b in range(1,6):
f = os.path.join(root, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
print 'Xtrain shape', Xtr.shape
print 'Ytrain shape', Ytr.shape
# Testing data
Xte, Yte = load_CIFAR_batch(os.path.join(root, 'test_batch'))
print 'Xtest shape', Xte.shape
print 'Ytest shape', Yte.shape
return Xtr, Ytr, Xte, Yte
评论列表
文章目录