def fit(self, X, y=None):
"""
This should fit classifier. All the "work" should be done here.
Note: assert is not a good choice here and you should rather
use try/except blog with exceptions. This is just for short syntax.
"""
# Generate batches
batches = batch_iter(
list(zip(X, y)), self.FLAGS.batch_size, self.FLAGS.num_epochs)
# Training loop. For each batch...
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
self.cnn.input_x: x_batch,
self.cnn.input_y: y_batch,
self.cnn.dropout_keep_prob: self.FLAGS.dropout_keep_prob
}
_, loss, accuracy = self.sess.run(
[self.optimizer, self.cnn.loss, self.cnn.accuracy],
feed_dict)
# print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
python类batch_iter()的实例源码
def batch_dev_step(x_batch, y_batch, writer=None):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
zip(x_batch, y_batch), FLAGS.batch_size, 1)
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 1.0
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, cnn.loss, cnn.accuracy],
feed_dict)
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
if writer:
writer.add_summary(summaries, step)
# Generate batches
def batch_dev_step(x_batch, y_batch, writer=None):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
zip(x_batch, y_batch), FLAGS.batch_size, 1)
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 1.0
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, cnn.loss, cnn.accuracy],
feed_dict)
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
if writer:
writer.add_summary(summaries, step)
# Generate batches
train.py 文件源码
项目:question-classification-cnn-rnn-attention
作者: sefira
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def dev_step(x_dev, y_dev):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
list(zip(x_dev, y_dev)), FLAGS.batch_size, 1)
loss_sum = 0
accuracy_sum = 0
count = 0
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
rnn.input_x: x_batch,
rnn.input_y: y_batch,
rnn.dropout_keep_prob: 1.0,
rnn.batch_size: len(x_batch),
rnn.real_len: real_len(x_batch)
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, rnn.loss, rnn.accuracy],
feed_dict)
loss_sum = loss_sum + loss
accuracy_sum = accuracy_sum + loss
count = count + 1
loss = loss_sum / count
accuracy = accuracy_sum / count
time_str = datetime.datetime.now().isoformat()
logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
dev_summary_writer.add_summary(summaries, step)
# Generate batches
def predict(self, x_raw):
x_raw = list(x_raw)
x_raw = [s.strip() for s in x_raw]
x_raw = [list(s) for s in x_raw]
x_pad,_ = data_helpers.pad_sentences(x_raw,sequence_length)
x_test = np.array([[vocabulary.get(word,0) for word in sentence] for sentence in x_pad])
# Get the placeholders from the graph by name
input_x = self.graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = self.graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = self.graph.get_operation_by_name("output/predictions").outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
for x_test_batch in batches:
batch_predictions = self.sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions])
return all_predictions
#test predict
#========================================
def get_batches():
print("Loading train data...")
lexical_features = lexical_level_features(df)
batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, FLAGS.num_epochs)
return batch_iterator
def get_batches_test():
print("Loading test data...")
df = data_helpers.read_data("/home/sahil/ML-bucket/test.csv")
lexical_features = lexical_level_features(df)
batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, 1, shuffle=False)
return batch_iterator
def fun():
r = hack()
s = data_helpers.batch_iter(r, 64, 1)
return s
def linear_NN(X, y):
graph = tf.Graph()
with graph.as_default():
nn = linear_nn.nn_linear(X, y)
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.MomentumOptimizer(
learning_rate=0.001,
momentum=0.9,
use_nesterov=True,
).minimize(nn.loss, global_step=global_step)
with tf.Session(graph=graph) as session:
train_loss_history = []
session.run(tf.global_variables_initializer())
batches = data_helpers.batch_iter(zip(X, y), batch_size=64, num_epochs=num_epochs, shuffle=True)
for batch in batches:
X_train, y_train = zip(*batch)
feed_dict = {nn.input_x: np.asarray(X_train), nn.input_y: np.asarray(y_train)}
_, step, loss, predictions = session.run([optimizer, global_step, nn.loss, nn.predictions], feed_dict)
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}".format(time_str, step, loss))
train_loss_history.append(loss)
# if step % 10 == 0:
# pass
x_axis = np.arange(step)
plt.plot(x_axis, train_loss_history, "b-", linewidth=2, label="train")
plt.grid()
plt.legend()
plt.ylabel("loss")
plt.show()
def dev_step(x_dev, y_dev, writer=None):
'''
Evaluates model on full dev set.
--------------------------------
Since full dev set likely won't fit into memory, this function
splits the dev set into minibatches and returns the average
of loss and accuracy to cmd line and to summary writer
'''
dev_stats = StatisticsCollector()
dev_batches = data_helpers.batch_iter(list(zip(x_dev, y_dev)),
FLAGS.batch_size, 1)
for dev_batch in dev_batches:
if len(dev_batch) > 0:
x_dev_batch, y_dev_batch = zip(*dev_batch)
feed_dict = {
cnn.input_song1: tuple(spect_dict[i[0]] for i in x_dev_batch),
cnn.input_song2: tuple(spect_dict[i[1]] for i in x_dev_batch),
cnn.input_y: y_dev_batch,
cnn.dropout_keep_prob: 1.0
}
step, loss, accuracy = sess.run(
[global_step, cnn.loss, cnn.accuracy],
feed_dict)
dev_stats.collect(accuracy, loss)
time_str = datetime.datetime.now().isoformat()
batch_accuracy, batch_loss, summaries = dev_stats.report()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, batch_accuracy))
if writer:
writer.add_summary(summaries, step)
# Generate training batches
def batch_dev_step(x_batch, y_batch, writer=None):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
zip(x_batch, y_batch), FLAGS.batch_size, 1)
t_acc = 0.0
t_acc = float(t_acc)
t_loss = 0.0
t_loss = float(t_loss)
t = 0
f_r = open(file_name, "a+")
step1 = 0
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 1.0
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, cnn.loss, cnn.accuracy],
feed_dict)
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
t_acc += accuracy
t_loss += loss
t += 1
step1 = step
if writer:
writer.add_summary(summaries, step)
f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n")
print "total ", t_acc/t, "loss", t_loss/t
# Generate batches
def batch_dev_step(x_batch, y_batch, writer=None):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
zip(x_batch, y_batch), FLAGS.batch_size, 1)
t_acc = 0.0
t_acc = float(t_acc)
t_loss = 0.0
t_loss = float(t_loss)
t = 0
f_r = open(file_name, "a+")
step1 = 0
for batch in batches:
x_batch, y_batch = zip(*batch)
feed_dict = {
cnn.input_x: x_batch,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: 1.0
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, cnn.loss, cnn.accuracy],
feed_dict)
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
t_acc += accuracy
t_loss += loss
t += 1
step1 = step
if writer:
writer.add_summary(summaries, step)
f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n")
print "total ", t_acc/t, "loss", t_loss/t
# Generate batches
def dev_step(x_dev, pos_dev, neg_dev):
"""
Evaluates model on a dev set
"""
batches = data_helpers.batch_iter(
list(zip(x_dev, pos_dev, neg_dev)), FLAGS.batch_size, 1)
loss_sum = 0
accuracy_sum = 0
count = 0
for batch in batches:
x_batch, pos_batch, neg_batch = zip(*batch)
feed_dict = {
rnn.input_x: x_batch,
rnn.input_xpos: pos_batch,
rnn.input_xneg: neg_batch,
rnn.real_len_x: real_len(x_batch),
rnn.real_len_xpos: real_len(pos_batch),
rnn.real_len_xneg: real_len(neg_batch),
rnn.dropout_keep_prob: 1.0,
rnn.batch_size: len(x_batch),
}
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, rnn.loss, rnn.accuracy],
feed_dict)
loss_sum = loss_sum + loss
accuracy_sum = accuracy_sum + loss
count = count + 1
loss = loss_sum / count
accuracy = accuracy_sum / count
time_str = datetime.datetime.now().isoformat()
logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
dev_summary_writer.add_summary(summaries, step)
# Generate batches
def getSentimentCNN(fileToLoad, modelDir):
checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
batch_size = 64
x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
y_test = np.argmax(y_test, axis=1)
print("Vocabulary size: {:d}".format(len(vocabulary)))
print("Test set size {:d}".format(len(y_test)))
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=False)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
scores = graph.get_operation_by_name("output/scores").outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
all_scores = []
for x_test_batch in batches:
batch_scores = sess.run(scores, {input_x: x_test_batch, dropout_keep_prob: 1.0})
batch_predictions = np.argmax(batch_scores,axis=1)
#batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions])
all_scores = np.concatenate([all_scores,batch_scores[:,1] - batch_scores[:,0]])
mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
mss = np.mean(all_scores)
print "Mean Binary Sentiment",mbs
print "Mean Smooth Sentiment",mss
return all_predictions,all_scores
def getSentimentRNN(fileToLoad,modelDir):
checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
batch_size = 64
n_hidden = 256
x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
y_test = np.argmax(y_test, axis=1)
print("Vocabulary size: {:d}".format(len(vocabulary)))
print("Test set size {:d}".format(len(y_test)))
x_test = np.fliplr(x_test)
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=False)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
print("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("x_input").outputs[0]
predictions = graph.get_operation_by_name("prediction").outputs[0]
istate = graph.get_operation_by_name('initial_state').outputs[0]
keep_prob = graph.get_operation_by_name('keep_prob').outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
all_scores = []
for x_test_batch in batches:
batch_predictions = sess.run(predictions, {input_x: x_test_batch, istate: np.zeros((len(x_test_batch), 2*n_hidden)), keep_prob: 1.0})
binaryPred = np.argmax(batch_predictions,axis=1)
all_predictions = np.concatenate([all_predictions, binaryPred])
all_scores = np.concatenate([all_scores, batch_predictions[:,1] - batch_predictions[:,0]])
mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
mss = np.mean(all_scores)
print "Mean Binary Sentiment",mbs
print "Mean Smooth Sentiment",mss
return all_predictions,all_scores
def text_cnn_load_model_and_eval(x_test,
checkpoint_file,
allow_soft_placement,
log_device_placement,
embeddings):
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=allow_soft_placement,
log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
# Generate batches for one epoch
batch_size = 50
batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
# Load embeddings placeholder
embedding_size = embeddings.shape[1]
embeddings_number = embeddings.shape[0]
print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number)
# with tf.name_scope("embedding"):
# embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size])
embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0]
for x_test_batch in batches:
batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0,
embeddings_placeholder: embeddings})
all_predictions = np.concatenate([all_predictions, batch_predictions])
return all_predictions
def text_cnn_load_model_and_eval_v2(x_test_s1,
x_test_s2,
checkpoint_file,
allow_soft_placement,
log_device_placement,
embeddings):
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=allow_soft_placement,
log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x_s1 = graph.get_operation_by_name("input_x_s1").outputs[0]
input_x_s2 = graph.get_operation_by_name("input_x_s2").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
# Generate batches for one epoch
batch_size = 50
batches = data_helpers.batch_iter(list(zip(x_test_s1, x_test_s2)), batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
# Load embeddings placeholder
embedding_size = embeddings.shape[1]
embeddings_number = embeddings.shape[0]
print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number)
# with tf.name_scope("embedding"):
# embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size])
embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0]
for batch in batches:
x_test_batch_s1, x_test_batch_s2 = zip(*batch)
batch_predictions = sess.run(predictions, {input_x_s1: x_test_batch_s1,
input_x_s2: x_test_batch_s2,
dropout_keep_prob: 1.0,
embeddings_placeholder: embeddings})
all_predictions = np.concatenate([all_predictions, batch_predictions])
return all_predictions