def get_dataset(dataset_path='Data/Train_Data'):
# Getting all data from data path:
try:
X = np.load('Data/npy_train_data/X.npy')
Y = np.load('Data/npy_train_data/Y.npy')
except:
labels = listdir(dataset_path) # Geting labels
X = []
Y = []
for label in labels:
datas_path = dataset_path+'/'+label
for data in listdir(datas_path):
img = get_img(datas_path+'/'+data)
X.append(img)
Y.append(int(label))
# Create dateset:
X = np.array(X).astype('float32')/255.
Y = np.array(Y).astype('float32')
Y = to_categorical(Y, 2)
if not os.path.exists('Data/npy_train_data/'):
os.makedirs('Data/npy_train_data/')
np.save('Data/npy_train_data/X.npy', X)
np.save('Data/npy_train_data/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
return X, X_test, Y, Y_test
python类to_categorical()的实例源码
def transform(self, X, y=None):
chars = []
for doc in X:
char_ids = []
for char in doc[:self.maxlen]:
if char in self.vocab:
char_ids.append(self.vocab[char])
else:
char_ids.append(self.vocab[UNK])
char_ids += [self.vocab[PAD]] * (self.maxlen - len(char_ids)) # padding
chars.append(char_ids)
chars = dense_to_one_hot(chars, len(self.vocab))
if y is not None:
y = [self.classes[t] for t in y]
y = to_categorical(y, len(self.classes))
return (chars, y) if y is not None else chars
def onehot(data):
'''One Hot Encoding
WHAT: Transform a feature in to binary columns.
HOW: onehot(df.col)
INPUT: An array, series, or list
OUTPUT: Multiple columns of binary values that
represent the input values.
'''
if type(data) == list:
data = pd.Series(data)
data = data.astype(int)
data = array(data)
encoded = to_categorical(data)
encoded = pd.DataFrame(encoded)
return encoded
def one_hot_encode(array, len_dic):
return np.array([to_categorical(vector, num_classes=len_dic) for vector in array])
def check_model(path=MODEL_PATH, file=SAMPLE_CSV_FILE, nsamples=2):
'''
see predictions generated for the training dataset
'''
# load model
model = load_model(path)
# load data
data, dic = get_data(file)
rows, questions, true_answers = encode_data(data, dic)
# visualize model graph
# plot_model(model, to_file='tableqa_model.png')
# predict answers
prediction = model.predict([rows[:nsamples], questions[:nsamples]])
print prediction
predicted_answers = [[np.argmax(character) for character in sample] for sample in prediction]
print predicted_answers
print true_answers[:nsamples]
# one hot encode answers
# true_answers = [to_categorical(answer, num_classes=len(dic)) for answer in answers[:nsamples]]
# decode chars from char ids int
inv_dic = {v: k for k, v in dic.iteritems()}
for i in xrange(nsamples):
print '\n'
# print 'Predicted answer: ' + ''.join([dic[char] for char in sample])
print 'Table: ' + ''.join([inv_dic[char_id] for char_id in rows[i] if char_id != 0])
print 'Question: ' + ''.join([inv_dic[char_id] for char_id in questions[i] if char_id != 0])
print 'Answer(correct): ' + ''.join([inv_dic[char_id] for char_id in true_answers[i] if char_id != 0])
print 'Answer(predicted): ' + ''.join([inv_dic[char_id] for char_id in predicted_answers[i] if char_id != 0])
def get_class_one_hot(self, class_str):
"""Given a class as a string, return its number in the classes
list. This lets us encode and one-hot it for training."""
# Encode it first.
label_encoded = self.classes.index(class_str)
# Now one-hot it.
label_hot = to_categorical(label_encoded, len(self.classes))
assert len(label_hot) == len(self.classes)
return label_hot
def load_training_data(self):
#training_dataframe = pandas.read_csv(self.commandline_args.train)
#values = training_dataframe.values[:,1:]
#labels = training_dataframe.values[:,0]
(X_train, y_train), (X_test, y_test) = self.cifar_data
shaped_labels = to_categorical(y_train, self.num_classes+1)
scaled_values = self.scale_values(X_train)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_testing_data(self):
#testing_dataframe = pandas.read_csv(self.commandline_args.test)
#values = testing_dataframe.values
(X_train, y_train), (X_test, y_test) = self.cifar_data
shaped_labels = to_categorical(y_test, self.num_classes+1)
scaled_values = self.scale_values(X_test)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_training_data(self):
#training_dataframe = pandas.read_csv(self.commandline_args.train)
#values = training_dataframe.values[:,1:]
#labels = training_dataframe.values[:,0]
(X_train, y_train), (X_test, y_test) = self.cifar_data
#shaped_labels = to_categorical(y_train, self.num_classes+1)
shaped_labels = to_categorical(np.full((y_train.shape[0], 1), 0), self.num_classes+1)
scaled_values = self.scale_values(X_train)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_testing_data(self):
#testing_dataframe = pandas.read_csv(self.commandline_args.test)
#values = testing_dataframe.values
(X_train, y_train), (X_test, y_test) = self.cifar_data
#shaped_labels = to_categorical(y_test, self.num_classes+1)
shaped_labels = to_categorical(np.full((y_train.shape[0], 1), 0), self.num_classes+1)
scaled_values = self.scale_values(X_test)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_training_data(self):
#training_dataframe = pandas.read_csv(self.commandline_args.train)
#values = training_dataframe.values[:,1:]
#labels = training_dataframe.values[:,0]
(X_train, y_train), (X_test, y_test) = self.cifar_data
shaped_labels = to_categorical(y_train, self.num_classes+1)
scaled_values = self.scale_values(X_train)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_training_data(self):
#training_dataframe = pandas.read_csv(self.commandline_args.train)
#values = training_dataframe.values[:,1:]
#labels = training_dataframe.values[:,0]
(X_train, y_train), (X_test, y_test) = self.mnist_data
shaped_labels = to_categorical(y_train, self.num_classes+1)
scaled_values = self.scale_values(X_train)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_testing_data(self):
#testing_dataframe = pandas.read_csv(self.commandline_args.test)
#values = testing_dataframe.values
(X_train, y_train), (X_test, y_test) = self.mnist_data
shaped_labels = to_categorical(y_test, self.num_classes+1)
scaled_values = self.scale_values(X_test)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_training_data(self):
#training_dataframe = pandas.read_csv(self.commandline_args.train)
#values = training_dataframe.values[:,1:]
#labels = training_dataframe.values[:,0]
(X_train, y_train), (X_test, y_test) = self.cifar_data
shaped_labels = to_categorical(y_train, self.num_classes+1)
scaled_values = self.scale_values(X_train)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def load_testing_data(self):
#testing_dataframe = pandas.read_csv(self.commandline_args.test)
#values = testing_dataframe.values
(X_train, y_train), (X_test, y_test) = self.cifar_data
shaped_labels = to_categorical(y_test, self.num_classes+1)
scaled_values = self.scale_values(X_test)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def save_results(self, filename):
# save some samples
fake_categories = np.random.choice(self.num_classes,16)
fake_vectors = to_categorical(fake_categories, self.num_classes+1)
random_value_part = np.random.uniform(0,1,size=[16,100-(self.num_classes+1)])
fake_values = np.concatenate((fake_vectors, random_value_part), axis=1)
#fake_values = np.random.uniform(0,1,size=[16,100])
images = self.generator.predict(fake_values)
plt.figure(figsize=(10,10))
for i in range(images.shape[0]):
plt.subplot(4, 4, i+1)
image = images[i, :, :, :]
if self.img_channels == 1:
image = np.reshape(image, [self.img_rows, self.img_cols])
elif K.image_data_format() == 'channels_first':
image = image.transpose(1,2,0)
# implicit no need to transpose if channels are last
plt.imshow(image, cmap='gray')
plt.axis('off')
plt.tight_layout()
plt.savefig(filename)
plt.close('all')
#def test_results(self, testing_values, testing_labels):
#predictions = self.model.predict(testing_values)
#df = pandas.DataFrame(data=np.argmax(predictions, axis=1), columns=['Label'])
#df.insert(0, 'ImageId', range(1, 1 + len(df)))
# save results
#df.to_csv(self.commandline_args.output, index=False)
def load_testing_data(self):
#testing_dataframe = pandas.read_csv(self.commandline_args.test)
#values = testing_dataframe.values
(X_train, y_train), (X_test, y_test) = self.mnist_data
shaped_labels = to_categorical(y_test, self.num_classes+1)
scaled_values = self.scale_values(X_test)
shaped_values = self.reshape_values(scaled_values)
return shaped_values, shaped_labels
def main(_):
pp.pprint(flags.FLAGS.__flags)
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
if not os.path.isdir(FLAGS.checkpoint):
os.mkdir(FLAGS.checkpoint)
if not os.path.isdir(FLAGS.log):
os.mkdir(FLAGS.log)
model = genChipModel()
model.summary()
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])#'categorical_crossentropy', metrics=['accuracy'])
filename = '../../data/finalData.txt'
x, y = readData(filename)
x_train, y_train, x_test, y_test = init(x, y)
y_train_labels = to_categorical(y_train, num_classes=79)
y_test_labels = to_categorical(y_test, num_classes=79)
model_path = os.path.join(FLAGS.checkpoint, "weights.hdf5")
callbacks = [
ModelCheckpoint(filepath=model_path, monitor="val_acc", save_best_only=True, save_weights_only=True),
TensorBoard(log_dir=FLAGS.log),
ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=2)
]
hist = model.fit(x_train, y_train_labels, epochs=FLAGS.epoch, batch_size=100, validation_data=(x_test, y_test_labels), callbacks=callbacks)
loss, accuracy = model.evaluate(x_test, y_test_labels, batch_size=100, verbose=1)
def main(_):
pp.pprint(flags.FLAGS.__flags)
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
if not os.path.isdir(FLAGS.checkpoint):
os.mkdir(FLAGS.checkpoint)
if not os.path.isdir(FLAGS.log):
os.mkdir(FLAGS.log)
model = genChipModel()
model.summary()
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])#'categorical_crossentropy', metrics=['accuracy'])
filename = '../../data/finalData.txt'
x, y = readData(filename)
x_train, y_train, x_test, y_test = init(x, y)
y_train_labels = to_categorical(y_train, num_classes=79)
y_test_labels = to_categorical(y_test, num_classes=79)
model_path = os.path.join(FLAGS.checkpoint, "weights.hdf5")
callbacks = [
ModelCheckpoint(filepath=model_path, monitor="val_acc", save_best_only=True, save_weights_only=True),
TensorBoard(log_dir=FLAGS.log),
ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=2)
]
hist = model.fit(x_train, y_train_labels, epochs=FLAGS.epoch, batch_size=100, validation_data=(x_test, y_test_labels), callbacks=callbacks)
loss, accuracy = model.evaluate(x_test, y_test_labels, batch_size=100, verbose=1)
def sample_generator_input(self, batch_size):
# Generator inputs
sampled_noise = np.random.normal(0, 1, (batch_size, 62))
sampled_labels = np.random.randint(0, 10, batch_size).reshape(-1, 1)
sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)
sampled_cont = np.random.uniform(-1, 1, size=(batch_size, 2))
return sampled_noise, sampled_labels, sampled_cont
def get_train_val(X,y,idx, options):
X_train = X[:idx] + X[idx+1:]
y_train = y[:idx] + y[idx+1:]
X_train = pad_sequences(X_train, maxlen = options['MAX_LEN'], padding = 'post', value = 0, truncating = 'post')
y_train = pad_sequences(y_train, maxlen = options['MAX_LEN'], padding = 'post', value = 0, truncating = 'post')
y_train_categorical = []
for idx in xrange(y_train.shape[0]):
y_train_categorical.append( to_categorical(y_train[idx], len(options['CLASSES_2_IX'])))
return X_train, np.array(y_train_categorical)
def data_gen(filename,dic_len,batch_size):
with h5py.File(filename,'r') as f:
datas,labels=f['x'][:],f['y'][:]
nb_samples=datas.shape[0]
index_max=nb_samples-batch_size
while 1:
start=int(np.random.randint(index_max,size=1))
data=datas[start:start+batch_size]
label=to_categorical(labels[start:start+batch_size],dic_len)
label=label.astype('int8').reshape((batch_size,-1,label.shape[-1]))
yield (data,label)
def get_labels(self, tag_sets):
"""Create labels from a list of tag_sets
Args:
tag_sets (list(list(str))): A list of word tag sets
Returns:
(list(list(int))): List of list of indices
"""
labels = []
print('Getting labels...')
for tag_set in tag_sets:
indexed_tags = self.index_tags(tag_set)
labels.append(to_categorical(np.asarray(indexed_tags), num_classes=4))
labels = pad_sequences(labels, maxlen=200)
return labels
def get_labels(self, categories):
"""Create labels from a list of categories
Args:
categories (list(str)): A list of product categories
Returns:
(list(int)): List of indices
"""
indexed_categories = self.index_categories(categories)
labels = to_categorical(np.asarray(indexed_categories))
return labels
def format(self, word_seq, vocab_size, sequence_size):
words = []
nexts = []
sequence_count = (len(word_seq) - 1) // sequence_size
for i in range(sequence_count):
start = i * sequence_size
words.append(word_seq[start:start + sequence_size])
next_seq = word_seq[(start + 1):(start + 1 + sequence_size)]
next_seq_as_one_hot = to_categorical(next_seq, vocab_size) # to one hot vector
nexts.append(next_seq_as_one_hot)
words = np.array(words)
nexts = np.array(nexts)
return words, nexts
def test_model_pipe_mnist_urls(self):
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))
no_epochs = 3
batch_size = 32
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(
x_train, y_train, validation_data=(
x_test,
y_test),
epochs=no_epochs,
batch_size=batch_size)
pipe = model_util.ModelPipe()
pipe.add(
lambda url: six.moves.urllib.request.urlopen(url).read(),
num_workers=2,
timeout=10)
pipe.add(lambda img: Image.open(BytesIO(img)))
pipe.add(model_util.resize_to_model_input(model))
pipe.add(lambda x: 1 - x)
pipe.add(model, num_workers=1, batch_size=32, batcher=np.vstack)
pipe.add(lambda x: np.argmax(x, axis=1))
url5 = 'http://blog.otoro.net/assets/20160401/png/mnist_output_10.png'
url2 = 'http://joshmontague.com/images/mnist-2.png'
urlb = 'http://joshmontague.com/images/mnist-3.png'
expected_output = {url5: 5, url2: 2}
output = pipe({url5: url5, url2: url2, urlb: urlb})
output = {k: v for k, v in six.iteritems(output) if v}
self.assertEquals(output, expected_output)
def train(self, epochs, batch_size=128, save_interval=50):
# Load the dataset
(X_train, y_train), (_, _) = mnist.load_data()
# Rescale -1 to 1
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
y_train = y_train.reshape(-1, 1)
half_batch = int(batch_size / 2)
for epoch in range(epochs):
# ---------------------
# Train Discriminator
# ---------------------
# Train discriminator on generator output
sampled_noise, sampled_labels, sampled_cont = self.sample_generator_input(half_batch)
gen_input = np.concatenate((sampled_noise, sampled_labels, sampled_cont), axis=1)
# Generate a half batch of new images
gen_imgs = self.generator.predict(gen_input)
fake = np.zeros((half_batch, 1))
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, sampled_labels, sampled_cont])
# Train discriminator on real data
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], half_batch)
imgs = X_train[idx]
labels = to_categorical(y_train[idx], num_classes=self.num_classes)
valid = np.ones((half_batch, 1))
d_loss_real = self.discriminator.train_on_batch(imgs, [valid, labels, sampled_cont])
# Avg. loss
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator
# ---------------------
valid = np.ones((batch_size, 1))
sampled_noise, sampled_labels, sampled_cont = self.sample_generator_input(batch_size)
gen_input = np.concatenate((sampled_noise, sampled_labels, sampled_cont), axis=1)
# Train the generator
g_loss = self.combined.train_on_batch(gen_input, [valid, sampled_labels, sampled_cont])
# Plot the progress
print ("%d [D loss: %.2f, acc.: %.2f%%, label_acc: %.2f%%] [G loss: %.2f]" % (epoch, d_loss[0], 100*d_loss[4], 100*d_loss[5], g_loss[0]))
# If at save interval => save generated image samples
if epoch % save_interval == 0:
self.save_imgs(epoch)
def __init__(self, gpuid, queue, results):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from keras.datasets import mnist
from keras.utils import to_categorical
Process.__init__(self, name='ModelProcessor')
self._gpuid = gpuid
self._queue = queue
self._results = results
# Load data on the worker
batch_size = 128
num_classes = 10
epochs = 1
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (self.x_test, self.y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0],-1)
self.x_test = self.x_test.reshape(self.x_test.shape[0],-1)
x_train = x_train.astype('float32')
self.x_test = self.x_test.astype('float32')
x_train /= 255
self.x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(self.x_test.shape[0], 'test samples')
idxs = np.arange(x_train.shape[0])
np.random.shuffle(idxs)
num_examples = 12000
self.x_train = x_train[idxs][:num_examples]
self.y_train = y_train[idxs][:num_examples]
# convert class vectors to binary class matrices
self.y_train = to_categorical(self.y_train, num_classes)
self.y_test = to_categorical(self.y_test, num_classes)
def train_classifier(X_train, y_train,
X_test, y_test, model_path, output_path, epochs):
image_shape = X_train[0].shape
model = create_base_network(X_train.shape[1:])
model.load_weights(model_path)
model = attach_classifier(model, 2)
opt = opts.RMSprop(epsilon = 1e-4, decay = 1e-6)
model.compile(
loss = 'categorical_crossentropy',
metrics = [acc],
optimizer = opt
)
callbacks_list = []
if output_path is not None:
if os.path.exists(output_path):
shutil.rmtree(output_path)
os.makedirs(output_path)
file_fmt = '{epoch:02d}-{loss:.4f}-{val_loss:.4f}.hdf5'
checkpoint = SeparateSaveCallback(
output_path, file_fmt, siamese = False)
callbacks_list = [checkpoint]
y_train = to_categorical(y_train, 2)
if X_test is not None and y_test is not None:
y_test = to_categorical(y_test, 2)
history = model.fit(
x = X_train,
y = y_train,
callbacks = callbacks_list,
batch_size = batch_size,
validation_data = (X_test, y_test),
epochs = epochs
)
else:
history = model.fit(
x = X_train,
y = y_train,
callbacks = callbacks_list,
batch_size = batch_size,
validation_split = 0.5,
epochs = epochs
)
return history