def do_split():
(X_train, y_train), (X_test, y_test) = mnist.load_data()
os.mkdir('train')
os.mkdir('test')
np.savetxt('labels_train.csv', y_train, header='label')
np.savetxt('labels_test.csv', y_test, header='label')
for i in xrange(X_train.shape[0]):
im = Image.fromarray(np.uint8(X_train[i]))
im.save('train'+str(i)+'.png')
for i in xrange(X_test.shape[0]):
im = Image.fromarray(np.uint8(X_test[i]))
im.save('test'+str(i)+'.png')
python类load_data()的实例源码
mnist_split_to_test_and_train.py 文件源码
项目:kaggle_art
作者: small-yellow-duck
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def get_data():
# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)[:max_train_samples]
X_test = X_test.reshape(10000, 784)[:max_test_samples]
X_train = X_train.astype("float32") / 255
X_test = X_test.astype("float32") / 255
# convert class vectors to binary class matrices
y_train = y_train[:max_train_samples]
y_test = y_test[:max_test_samples]
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
test_ids = np.where(y_test == np.array(weighted_class))[0]
return (X_train, Y_train), (X_test, Y_test), test_ids
def loadmnist():
'''
Load the mnist data once into global variables X_mnist and y_mnist.
'''
from keras.datasets import mnist
global X_mnist
global y_mnist
train, test = mnist.load_data()
X_mnist = []
y_mnist = []
for d in [train, test]:
X, y = d
X = X.astype('float32')
X /= 255.
idx = np.argsort(y)
X_mnist.append(X[idx, :, :])
y_mnist.append(y[idx])
def load_mnist():
'''
returns mnist_data
'''
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if k.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype(k.floatx())
x_train *= 0.96/255
x_train += 0.02
return input_shape, x_train
def get_data(n_train, n_test, nb_classes):
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
img_rows, img_cols = (28,28)
# make some that are the same
X_digits = {ind:X_train[np.where(y_train == ind)] for ind in range(10) }
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
X_train = X_train[:n_train]
X_test = X_test[:n_test]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train[:n_train], nb_classes)
Y_test = np_utils.to_categorical(y_test[:n_test], nb_classes)
return X_train, Y_train, X_test, Y_test
def get_cifar(nb_classes=10):
# input image dimensions
# img_rows, img_cols = 32, 32
# # The CIFAR10 images are RGB.
# img_channels = 3
# The data, shuffled and split between train and test sets:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
return X_train, Y_train, X_test, Y_test
def get_data(data_name='mnist', test_flag=False):
if data_name == 'daudi':
(X_train, y_train), (X_test, y_test) = daudi_load_data()
if test_flag:
X_train = X_test
# approximately -0.2+1 to 0.2+1 --> -1. 1
X_train = (X_train - 1.0) * 5.0
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
else:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
if test_flag:
X_train = X_test
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
return X_train
def get_data(data_name='mnist', test_flag=False):
if data_name == 'daudi':
(X_train, y_train), (X_test, y_test) = daudi_load_data()
if test_flag:
X_train = X_test
# approximately -0.2+1 to 0.2+1 --> -1. 1
X_train = (X_train - 1.0) * 5.0
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
else:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
if test_flag:
X_train = X_test
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
return X_train
def __init___r0(self):
"""
Load data but reduce the resolution (1/2, 1/2) for x and y direction
After that, zoom is applied to expand larger size images.
Then, the further processes are no needed to be updated.
"""
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train[:, ::2, ::2], X_test[:, ::2, ::2]
X_train_zoom = np.zeros((X_train.shape[0], X_train.shape[1] * 2, X_train.shape[2] * 2), dtype=X_train.dtype)
X_test_zoom = np.zeros((X_test.shape[0], X_test.shape[1] * 2, X_test.shape[2] * 2), dtype=X_test.dtype)
for i in range(X_train.shape[0]):
X_train_zoom[i] = ndimage.interpolation.zoom(X_train[i], 2)
for i in range(X_test.shape[0]):
X_test_zoom[i] = ndimage.interpolation.zoom(X_test[i], 2)
self.Org = (X_train_zoom, y_train), (X_test_zoom, y_test)
self.Data = self.Org
def test_cifar(self):
print('cifar10')
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print('cifar100 fine')
(X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print('cifar100 coarse')
(X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
def test_parser(prototxt, t):
from protoflow import ProtoFlow
from berry.layers import print_layers_summary
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train[:, np.newaxis, ...]
X_test = X_test[:, np.newaxis, ...]
y_train = np.asarray([one_hot(y, 10) for y in list(y_train)])
y_test = np.asarray([one_hot(y, 10) for y in list(y_test)])
print X_train.shape, y_train.shape
with tf.device('/gpu:2'):
parser = ProtoFlow(prototxt, t, 100)
model = parser.model
print_layers_summary(model.layers)
print[v.name for v in tf.trainable_variables()]
def show_image():
"""
this function is for a test to show, server image
:return:
"""
(X_train, y_train), (X_test, y_test) = load_data()
# (X_train, y_train), (X_test, y_test) = mnist.load_data()
# plot 4 images as gray scale
plt.subplot(221)
plt.imshow(X_train[0], cmap=plt.get_cmap('gray'))
plt.subplot(222)
plt.imshow(X_train[1], cmap=plt.get_cmap('gray'))
plt.subplot(223)
plt.imshow(X_train[2], cmap=plt.get_cmap('gray'))
plt.subplot(224)
plt.imshow(X_train[3], cmap=plt.get_cmap('gray'))
# show the plot
plt.show()
def generate_data():
(X_train, y_train), (X_test, y_test) = load_data()
# flatten 28*28 images to a 784 vector for each image
print X_train.shape[1], X_train.shape[2], X_train.shape
# X_train.shape -> (60000L, 28L, 28L)
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
# print y_train.shape, y_test.shape
# y_train.shape -> (60000L, 10L), y_test.shape -> (10000L, 10L)
num_classes = y_test.shape[1]
return X_train, y_train, X_test, y_test
def mnist_data():
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
return {
'X_train': X_train,
'Y_train': Y_train,
'X_test': X_test,
'Y_test': Y_test
}
# TODO: test with RmsProp obj and evaluate if merged defaults should do a deepcopy
example_gan_convolutional.py 文件源码
项目:Deep-Learning-with-Keras
作者: PacktPublishing
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def mnist_data():
(xtrain, ytrain), (xtest, ytest) = mnist.load_data()
return mnist_process(xtrain), mnist_process(xtest)
def load_mnist():
"""
Loads the MNIST dataset
:return:
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((x_train.shape[0], -1)) / 255.0
x_test = x_test.reshape((x_test.shape[0], -1)) / 255.0
return np.float32(x_train), y_train, np.float32(x_test), y_test
def mnist (labels = range(10)):
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train.astype('float32') / 255.).round()
x_test = (x_test.astype('float32') / 255.).round()
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
def conc (x,y):
return np.concatenate((y.reshape([len(y),1]),x),axis=1)
def select (x,y):
selected = np.array([elem for elem in conc(x, y) if elem[0] in labels])
return np.delete(selected,0,1), np.delete(selected,np.s_[1::],1).flatten()
x_train, y_train = select(x_train, y_train)
x_test, y_test = select(x_test, y_test)
return x_train, y_train, x_test, y_test
def run_network(data=None, model=None, epochs=20, batch=256):
try:
start_time = time.time()
if data is None:
X_train, X_test, y_train, y_test = load_data()
else:
X_train, X_test, y_train, y_test = data
if model is None:
model = init_model()
history = LossHistory()
print 'Training model...'
model.fit(X_train, y_train, nb_epoch=epochs, batch_size=batch,
callbacks=[history],
validation_data=(X_test, y_test), verbose=2)
print "Training duration : {0}".format(time.time() - start_time)
score = model.evaluate(X_test, y_test, batch_size=16)
print "Network's test score [loss, accuracy]: {0}".format(score)
return model, history.losses
except KeyboardInterrupt:
print ' KeyboardInterrupt'
return model, history.losses
def run_network(data=None, model=None, epochs=20, batch=BATCH_SIZE):
try:
start_time = time.time()
if data is None:
X_train, X_test, y_train, y_test = load_data()
else:
X_train, X_test, y_train, y_test = data
if model is None:
model = init_model()
history = LossHistory()
print 'Training model...'
model.fit(X_train, y_train, nb_epoch=epochs, batch_size=batch,
callbacks=[history],
validation_data=(X_test, y_test), verbose=2)
print "Training duration : {0}".format(time.time() - start_time)
score = model.evaluate(X_test, y_test, batch_size=16)
print "Network's test score [loss, accuracy]: {0}".format(score)
return model, history.losses
except KeyboardInterrupt:
print ' KeyboardInterrupt'
return model, history.losses
def test_reuters():
# only run data download tests 20% of the time
# to speed up frequent testing
random.seed(time.time())
if random.random() > 0.8:
(X_train, y_train), (X_test, y_test) = reuters.load_data()
(X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10)