def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params):
with tf.name_scope("loss_mix2"):
float_labels = tf.cast(labels, tf.float32)
float_encoders = float_labels
for i in range(FLAGS.encoder_layers):
var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i)
weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32)
bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1])
float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i)
if i<FLAGS.encoder_layers-1:
float_encoders = tf.nn.relu(float_encoders)
else:
hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True)
hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True))
float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6)
#float_encoders = tf.nn.sigmoid(float_encoders)
cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders)
cross_entropy_loss = self.calculate_loss(predictions,labels)
return cross_entropy_encoder+cross_entropy_loss, float_encoders
#return cross_entropy_encoder, float_encoders
python类name_scope()的实例源码
def inference(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
# define lstm cess:get lstm cell output
lstm_fw_cell=rnn.BasicLSTMCell(self.hidden_size) #forward direction cell
lstm_bw_cell=rnn.BasicLSTMCell(self.hidden_size) #backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell=rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell=rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob=self.dropout_keep_prob)
# bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size]
# output: A tuple (outputs, output_states)
# where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`.
outputs,_=tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype=tf.float32) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
print("outputs:===>",outputs) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>))
#3. concat output
output_rnn=tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2]
self.output_rnn_last=tf.reduce_mean(output_rnn,axis=1) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO
print("output_rnn_last:", self.output_rnn_last) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32>
#4. logits(use linear layer)
with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(self.output_rnn_last, self.W_projection) + self.b_projection # [batch_size,num_classes]
return logits
def value_transition(self, curr_state, next_symbols, batch_size):
first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens
num_value_tokens = self.output_size - first_value_token
with tf.name_scope('grammar_transition'):
adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols)
assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols])
with tf.control_dependencies([assert1]):
transitions = tf.gather(tf.constant(self.transition_matrix), curr_state)
assert transitions.get_shape()[1:] == (self.output_size,)
indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1)
next_state = tf.gather_nd(transitions, indices)
assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state])
with tf.control_dependencies([assert2]):
return tf.identity(next_state)
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def finalize(self, outputs : BeamSearchOptimizationDecoderOutput, final_state : BeamSearchOptimizationDecoderState, sequence_lengths):
# all output fields are [max_time, batch_size, ...]
predicted_ids = tf.contrib.seq2seq.gather_tree(
outputs.predicted_ids, outputs.parent_ids,
sequence_length=sequence_lengths, name='predicted_ids')
total_loss = tf.reduce_sum(outputs.loss, axis=0, name='violation_loss')
predicted_time = tf.shape(predicted_ids)[0]
last_score = predicted_time-1
with tf.name_scope('gold_score'):
gold_score = outputs.gold_score[last_score]
with tf.name_scope('sequence_scores'):
sequence_scores = outputs.scores[last_score]
return FinalBeamSearchOptimizationDecoderOutput(beam_search_decoder_output=outputs,
predicted_ids=predicted_ids,
scores=sequence_scores,
gold_score=gold_score,
gold_beam_id=final_state.gold_beam_id,
num_available_beams=final_state.num_available_beams,
total_violation_loss=total_loss), final_state
grammar_decoder.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def step(self, time, inputs, state, name=None):
with tf.name_scope(name, "GrammarDecodingStep", (time, inputs, state)):
decoder_state, grammar_state = state
cell_outputs, cell_state = self._cell(inputs, decoder_state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
grammar_cell_outputs = self._grammar_helper.constrain_logits(cell_outputs, grammar_state)
cell_outputs = grammar_cell_outputs
sample_ids = self._helper.sample(time=time, outputs=grammar_cell_outputs, state=cell_state)
(finished, next_inputs, next_decoder_state) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
sample_ids=sample_ids)
if self._fixed_outputs is not None:
next_grammar_state = self._grammar_helper.transition(grammar_state, self._fixed_outputs.read(time), self.batch_size)
else:
next_grammar_state = self._grammar_helper.transition(grammar_state, sample_ids, self.batch_size)
next_state = (next_decoder_state, next_grammar_state)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, finished)
def create_optimizer():
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
beta1=FLAGS.beta1,
beta2=FLAGS.beta2,
epsilon=FLAGS.epsilon)
return optimizer
# Towers
# ======
# In order to properly make use of multiple GPU's, one must introduce new abstractions,
# not present when using a single GPU, that facilitate the multi-GPU use case.
# In particular, one must introduce a means to isolate the inference and gradient
# calculations on the various GPU's.
# The abstraction we intoduce for this purpose is called a 'tower'.
# A tower is specified by two properties:
# * **Scope** - A scope, as provided by `tf.name_scope()`,
# is a means to isolate the operations within a tower.
# For example, all operations within 'tower 0' could have their name prefixed with `tower_0/`.
# * **Device** - A hardware device, as provided by `tf.device()`,
# on which all operations within the tower execute.
# For example, all operations of 'tower 0' could execute on the first GPU `tf.device('/gpu:0')`.
def create_optimizer():
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
beta1=FLAGS.beta1,
beta2=FLAGS.beta2,
epsilon=FLAGS.epsilon)
return optimizer
# Towers
# ======
# In order to properly make use of multiple GPU's, one must introduce new abstractions,
# not present when using a single GPU, that facilitate the multi-GPU use case.
# In particular, one must introduce a means to isolate the inference and gradient
# calculations on the various GPU's.
# The abstraction we intoduce for this purpose is called a 'tower'.
# A tower is specified by two properties:
# * **Scope** - A scope, as provided by `tf.name_scope()`,
# is a means to isolate the operations within a tower.
# For example, all operations within 'tower 0' could have their name prefixed with `tower_0/`.
# * **Device** - A hardware device, as provided by `tf.device()`,
# on which all operations within the tower execute.
# For example, all operations of 'tower 0' could execute on the first GPU `tf.device('/gpu:0')`.
def create_optimizer():
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
beta1=FLAGS.beta1,
beta2=FLAGS.beta2,
epsilon=FLAGS.epsilon)
return optimizer
# Towers
# ======
# In order to properly make use of multiple GPU's, one must introduce new abstractions,
# not present when using a single GPU, that facilitate the multi-GPU use case.
# In particular, one must introduce a means to isolate the inference and gradient
# calculations on the various GPU's.
# The abstraction we intoduce for this purpose is called a 'tower'.
# A tower is specified by two properties:
# * **Scope** - A scope, as provided by `tf.name_scope()`,
# is a means to isolate the operations within a tower.
# For example, all operations within 'tower 0' could have their name prefixed with `tower_0/`.
# * **Device** - A hardware device, as provided by `tf.device()`,
# on which all operations within the tower execute.
# For example, all operations of 'tower 0' could execute on the first GPU `tf.device('/gpu:0')`.
def highway(self, input_1, input_2, size_1, size_2, l2_penalty=1e-8, layer_size=1):
output = input_2
for idx in range(layer_size):
with tf.name_scope('output_lin_%d' % idx):
W = tf.Variable(tf.truncated_normal([size_2,size_1], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b")
tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W))
tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b))
output = tf.nn.relu(tf.nn.xw_plus_b(output,W,b))
with tf.name_scope('transform_lin_%d' % idx):
W = tf.Variable(tf.truncated_normal([size_1,size_1], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b")
tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W))
tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b))
transform_gate = tf.sigmoid(tf.nn.xw_plus_b(input_1,W,b))
carry_gate = tf.constant(1.0) - transform_gate
output = transform_gate * output + carry_gate * input_1
return output
def calculate_loss_distill_boost(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_boost"):
print("loss_distill_boost")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
batch_size = tf.shape(float_labels)[0]
float_labels_distill = tf.cast(labels_distill, tf.float32)
error = tf.negative(float_labels * tf.log(float_labels_distill + epsilon) + (
1 - float_labels) * tf.log(1 - float_labels_distill + epsilon))
error = tf.reduce_sum(error,axis=1,keep_dims=True)
alpha = error / tf.reduce_sum(error) * tf.cast(batch_size,dtype=tf.float32)
alpha = tf.clip_by_value(alpha, 0.5, 5)
alpha = alpha / tf.reduce_sum(alpha) * tf.cast(batch_size,dtype=tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss * alpha)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss_distill_relabel(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_relabel"):
print("loss_distill_relabel")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
sum_labels = tf.cast(tf.reduce_sum(float_labels),dtype=tf.int32)
pos_distill, _ = tf.nn.top_k(tf.reshape(labels_distill,[-1]), k=sum_labels)
labels_true = tf.ones(tf.shape(labels))
labels_false = tf.zeros(tf.shape(labels))
labels_add = tf.where(tf.greater_equal(labels_distill, pos_distill[-1]), labels_true, labels_false)
print(labels_add.get_shape().as_list())
float_labels = float_labels+labels_add*(1.0-float_labels)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def get_input_evaluation_tensors(reader,
data_pattern,
batch_size=1024):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
if FLAGS.label_smoothing:
float_labels = smoothing(labels)
else:
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if weights is not None:
print cross_entropy_loss, weights
weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
print "create weighted_loss", weighted_loss
return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
else:
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def get_input_evaluation_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
print data_pattern, files
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_evaluation_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
print data_pattern, files
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * FLAGS.batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
if FLAGS.label_smoothing:
float_labels = smoothing(labels)
else:
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if weights is not None:
print cross_entropy_loss, weights
weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
print "create weighted_loss", weighted_loss
return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
else:
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def get_input_data_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=4 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_evaluation_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
print data_pattern, files
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_data_tensors(reader,
data_pattern,
batch_size=256,
num_epochs=None):
logging.info("Using batch size of " + str(batch_size) + " for training.")
with tf.name_scope("train_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find training files. data_pattern='" +
data_pattern + "'.")
logging.info("Number of training files: %s.", str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, num_epochs=num_epochs, shuffle=False)
training_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
training_data,
batch_size=batch_size,
capacity=FLAGS.batch_size * 4,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_evaluation_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
print data_pattern, files
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_data_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=3 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_data_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=4 * batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def get_input_data_tensors(reader,
data_pattern,
batch_size=256):
logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
with tf.name_scope("eval_input"):
files = gfile.Glob(data_pattern)
if not files:
raise IOError("Unable to find the evaluation files.")
logging.info("number of evaluation files: " + str(len(files)))
files.sort()
filename_queue = tf.train.string_input_producer(
files, shuffle=False, num_epochs=1)
eval_data = reader.prepare_reader(filename_queue)
return tf.train.batch(
eval_data,
batch_size=batch_size,
capacity=batch_size,
allow_smaller_final_batch=True,
enqueue_many=True)
def baseline_forward(self, X, size, n_class):
shape = X.get_shape()
_X = tf.transpose(X, [1, 0, 2]) # batch_size x sentence_length x word_length -> batch_size x sentence_length x word_length
_X = tf.reshape(_X, [-1, int(shape[2])]) # (batch_size x sentence_length) x word_length
seq = tf.split(0, int(shape[1]), _X) # sentence_length x (batch_size x word_length)
with tf.name_scope("LSTM"):
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
outputs, states = rnn.rnn(lstm_cell, seq, dtype=tf.float32)
with tf.name_scope("LSTM-Classifier"):
W = tf.Variable(tf.random_normal([size, n_class]), name="W")
b = tf.Variable(tf.random_normal([n_class]), name="b")
output = tf.matmul(outputs[-1], W) + b
return output
def cross_entropy_sequence_loss(logits, targets, sequence_length):
"""Calculates the per-example cross-entropy loss for a sequence of logits and
masks out all losses passed the sequence length.
Args:
logits: Logits of shape `[T, B, vocab_size]`
targets: Target classes of shape `[T, B]`
sequence_length: An int32 tensor of shape `[B]` corresponding
to the length of each input
Returns:
A tensor of shape [T, B] that contains the loss per example, per time step.
"""
with tf.name_scope("cross_entropy_sequence_loss"):
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=targets)
# Mask out the losses we don't care about
loss_mask = tf.sequence_mask(
tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
return losses
def _build_graph(self):
config = self.config
config.fast_test = False
eval_config = Config(clone=config)
eval_config.batch_size = 1
initializer = self.model_initializer
with tf.name_scope("Train"):
with tf.variable_scope("Model", reuse=False, initializer=initializer):
self.train_model = self.Model(config=config, is_training=True, loss_fct=self.loss_fct)
tf.summary.scalar("Training Loss", self.train_model.cost)
tf.summary.scalar("Learning Rate", self.train_model.lr)
with tf.name_scope("Valid"):
with tf.variable_scope("Model", reuse=True, initializer=initializer):
self.validation_model = self.Model(config=config, is_training=False, loss_fct="softmax")
tf.summary.scalar("Validation Loss", self.validation_model.cost)
with tf.name_scope("Test"):
with tf.variable_scope("Model", reuse=True, initializer=initializer):
self.test_model = self.Model(config=eval_config, is_training=False)
def get_training_tensors(self, learning_rate = 0.001, grad_clip = 5):
#-----------------------------------------------------------------------
# Build a loss function
#-----------------------------------------------------------------------
with tf.name_scope('targets-encode'):
y_one_hot = tf.one_hot(self.targets, self.n_classes)
y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape())
with tf.name_scope('loss'):
loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
labels=y_reshaped)
loss = tf.reduce_mean(loss)
tf.summary.scalar('loss', loss)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.name_scope('optimizer'):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return loss, optimizer
def get_optimizer(self, learning_rate = 0.001):
with tf.name_scope('loss'):
input_shape = tf.shape(self.inputs)
ones = tf.ones([input_shape[0], input_shape[1]])
loss = tf.contrib.seq2seq.sequence_loss(self.logits, self.targets,
ones)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients = optimizer.compute_gradients(loss)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) \
for grad, var in gradients if grad is not None]
optimizer_op = optimizer.apply_gradients(capped_gradients)
return optimizer_op, loss