def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data, mnist.target, random_state=seed)
mnist_X = mnist_X / 255.0
# pytorch?????
mnist_X, mnist_y = mnist_X.astype('float32'), mnist_y.astype('int64')
# 2?????????????????1?????
def flatten_img(images):
'''
images: shape => (n, rows, columns)
output: shape => (n, rows*columns)
'''
n_rows = images.shape[1]
n_columns = images.shape[2]
for num in range(n_rows):
if num % 2 != 0:
images[:, num, :] = images[:, num, :][:, ::-1]
output = images.reshape(-1, n_rows*n_columns)
return output
mnist_X = mnist_X.reshape(-1, 28, 28)
mnist_X = flatten_img(mnist_X) # X.shape => (n_samples, seq_len)
mnist_X = mnist_X[:, :, np.newaxis] # X.shape => (n_samples, seq_len, n_features)
# ????????????
train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
test_size=0.2,
random_state=seed)
return train_X, test_X, train_y, test_y
python类fetch_mldata()的实例源码
def load_data_target(name):
"""
Loads data and target given the name of the dataset.
"""
if name == "Boston":
data = load_boston()
elif name == "Housing":
data = fetch_california_housing()
dataset_size = 1000 # this is necessary so that SVR does not slow down too much
data["data"] = data["data"][:dataset_size]
data["target"] =data["target"][:dataset_size]
elif name == "digits":
data = load_digits()
elif name == "Climate Model Crashes":
try:
data = fetch_mldata("climate-model-simulation-crashes")
except HTTPError as e:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00252/pop_failures.dat"
data = urlopen(url).read().split('\n')[1:]
data = [[float(v) for v in d.split()] for d in data]
samples = np.array(data)
data = dict()
data["data"] = samples[:, :-1]
data["target"] = np.array(samples[:, -1], dtype=np.int)
else:
raise ValueError("dataset not supported.")
return data["data"], data["target"]
uci_loader.py 文件源码
项目:highdimensional-decision-boundary-plot
作者: tmadl
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def getdataset(datasetname, onehot_encode_strings=True):
# load
dataset = fetch_mldata(datasetname)
# get X and y
X = dshape(dataset.data)
try:
target = dshape(dataset.target)
except:
print("WARNING: No target found. Taking last column of data matrix as target")
target = X[:, -1]
X = X[:, :-1]
if len(target.shape) > 1 and target.shape[1] > X.shape[1]: # some mldata sets are mixed up...
X = target
target = dshape(dataset.data)
if len(X.shape) == 1 or X.shape[1] <= 1:
for k in dataset.keys():
if k != 'data' and k != 'target' and len(dataset[k]) == X.shape[1]:
X = np.hstack((X, dshape(dataset[k])))
# one-hot for categorical values
if onehot_encode_strings:
cat_ft = [i for i in range(X.shape[1]) if 'str' in str(
type(unpack(X[0, i]))) or 'unicode' in str(type(unpack(X[0, i])))]
if len(cat_ft):
for i in cat_ft:
X[:, i] = tonumeric(X[:, i])
X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X)
# if sparse, make dense
try:
X = X.toarray()
except:
pass
# convert y to monotonically increasing ints
y = tonumeric(target).astype(int)
return np.nan_to_num(X.astype(float)), y
def train(self, epochs, batch_size=128, save_interval=50):
mnist = fetch_mldata('MNIST original')
X = mnist.data.reshape((-1,) + self.img_shape)
y = mnist.target
# Rescale -1 to 1
X = (X.astype(np.float32) - 127.5) / 127.5
half_batch = int(batch_size / 2)
for epoch in range(epochs):
# ---------------------
# Train Discriminator
# ---------------------
self.discriminator.set_trainable(True)
# Select a random half batch of images
idx = np.random.randint(0, X.shape[0], half_batch)
imgs = X[idx]
noise = np.random.normal(0, 1, (half_batch, 100))
# Generate a half batch of images
gen_imgs = self.generator.predict(noise)
valid = np.concatenate((np.ones((half_batch, 1)), np.zeros((half_batch, 1))), axis=1)
fake = np.concatenate((np.zeros((half_batch, 1)), np.ones((half_batch, 1))), axis=1)
# Train the discriminator
d_loss_real, d_acc_real = self.discriminator.train_on_batch(imgs, valid)
d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * (d_loss_real + d_loss_fake)
d_acc = 0.5 * (d_acc_real + d_acc_fake)
# ---------------------
# Train Generator
# ---------------------
# We only want to train the generator for the combined model
self.discriminator.set_trainable(False)
# Sample noise and use as generator input
noise = np.random.normal(0, 1, (batch_size, 100))
# The generator wants the discriminator to label the generated samples as valid
valid = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))), axis=1)
# Train the generator
g_loss, g_acc = self.combined.train_on_batch(noise, valid)
# Display the progress
print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" % (epoch, d_loss, 100*d_acc, g_loss, 100*g_acc))
# If at save interval => save generated image samples
if epoch % save_interval == 0:
self.save_imgs(epoch)
restricted_boltzmann_machine.py 文件源码
项目:ML-From-Scratch
作者: eriklindernoren
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def main():
mnist = fetch_mldata('MNIST original')
X = mnist.data / 255.0
y = mnist.target
# Select the samples of the digit 2
X = X[y == 2]
# Limit dataset to 500 samples
idx = np.random.choice(range(X.shape[0]), size=500, replace=False)
X = X[idx]
rbm = RBM(n_hidden=50, n_iterations=200, batch_size=25, learning_rate=0.001)
rbm.fit(X)
# Training error plot
training, = plt.plot(range(len(rbm.training_errors)), rbm.training_errors, label="Training Error")
plt.legend(handles=[training])
plt.title("Error Plot")
plt.ylabel('Error')
plt.xlabel('Iterations')
plt.show()
# Get the images that were reconstructed during training
gen_imgs = rbm.training_reconstructions
# Plot the reconstructed images during the first iteration
fig, axs = plt.subplots(5, 5)
plt.suptitle("Restricted Boltzmann Machine - First Iteration")
cnt = 0
for i in range(5):
for j in range(5):
axs[i,j].imshow(gen_imgs[0][cnt].reshape((28, 28)), cmap='gray')
axs[i,j].axis('off')
cnt += 1
fig.savefig("rbm_first.png")
plt.close()
# Plot the images during the last iteration
fig, axs = plt.subplots(5, 5)
plt.suptitle("Restricted Boltzmann Machine - Last Iteration")
cnt = 0
for i in range(5):
for j in range(5):
axs[i,j].imshow(gen_imgs[-1][cnt].reshape((28, 28)), cmap='gray')
axs[i,j].axis('off')
cnt += 1
fig.savefig("rbm_last.png")
plt.close()