def mixture_loss(pred, y, n_mixtures, batch_size):
pred = tf.verify_tensor_all_finite(pred, "Pred not finite!")
out_pi, out_sigma, out_mu, out_rho = splitMix(pred, n_mixtures, batch_size)
result_binorm, result_delta = tf_bivariate_normal(y, out_mu, out_sigma, out_rho, n_mixtures, batch_size)
result_binorm = tf.verify_tensor_all_finite(result_binorm, "Result not finite1!")
result_weighted = tf.mul(result_binorm, out_pi)
result_weighted = tf.verify_tensor_all_finite(result_weighted, "Result not finite2!")
result_raw = tf.reduce_sum(result_weighted + epsilon, 1, keep_dims=True)
result_raw = tf.Print(result_raw, [tf.reduce_sum(result_raw)], "Sum of weighted density. If zero, sigma is too small: ")
result_raw = tf.Print(result_raw, [tf.reduce_max(result_raw)], "Max of weighted density. If zero, sigma is too small: ")
result_raw = tf.verify_tensor_all_finite(result_raw, "Result not finite3!")
result = -tf.log(result_raw + e)
result = tf.verify_tensor_all_finite(result, "Result not finite4!")
result = tf.reduce_sum(result)
result = tf.verify_tensor_all_finite(result, "Result not finite5!")
return result
# Returns the LSTM stack created based on the parameters.
# Processes several batches at once.
# Input shape is: (parameters['batch_size'], parameters['n_steps'], parameters['n_input'])
python类verify_tensor_all_finite()的实例源码
def add_softmax(self):
"""Adds a softmax operation to this model"""
with tf.variable_scope(self._get_layer_str()):
this_input = tf.square(self.get_output())
reduction_indices = list(range(1, len(this_input.get_shape())))
acc = tf.reduce_sum(this_input, reduction_indices=reduction_indices, keep_dims=True)
out = this_input / (acc+FLAGS.epsilon)
#out = tf.verify_tensor_all_finite(out, "add_softmax failed; is sum equal to zero?")
self.outputs.append(out)
return self
def add_softmax(self):
"""Adds a softmax operation to this model"""
this_input = tf.square(self.get_output())
reduction_indices = list(range(1, len(this_input.get_shape())))
acc = tf.reduce_sum(this_input, reduction_indices=reduction_indices, keep_dims=True)
out = this_input / (acc+FLAGS.epsilon)
#out = tf.verify_tensor_all_finite(out, "add_softmax failed; is sum equal to zero?")
self.outputs.append(out)
return self
def dOmega_dWrec(self):
# states in shape timesteps, batch, n_rec
states = self.states
dxt_list = tf.gradients(self.error, states)
#dxt_list[0] = tf.Print(dxt_list[0], [dxt_list[0]], "dxt 0: ")
test = tf.gradients(states[0], states[-1])
dxt = tf.stack(dxt_list)
xt = tf.stack(states)
num = (1 - self.alpha) * dxt + tf.tensordot(self.alpha * dxt ,
tf.transpose(
tf.matmul(tf.abs(self.W_rec) * self.rec_Connectivity,self.Dale_rec)),
axes=1) * \
tf.where(tf.greater(xt, 0), tf.ones_like(xt), tf.zeros_like(xt))
denom = dxt
# sum over hidden units
num = tf.reduce_sum(tf.square(num), axis=2)
denom = tf.reduce_sum(tf.square(denom), axis=2)
bounded = tf.where(tf.greater(denom, 1e-20), tf.div(num, 1.0 * denom), tf.ones_like(num))
nelems = tf.reduce_mean(tf.where(tf.greater(denom, 1e-20), 1.0 * tf.ones_like(num), 1.0 * tf.zeros_like(num)), axis=1)
# sum mean over each batch by time steps
Omega = tf.square(bounded - 1.0)
Omega = tf.reduce_sum(tf.reduce_mean(Omega, axis=1)) / (1.0 * tf.reduce_sum(nelems))
out = tf.gradients(Omega, self.W_rec)
out[0] = tf.Print(out[0], [out[0], self.W_rec, Omega], "omega grads")
out[0] = tf.verify_tensor_all_finite(out[0], "dead omega grad")
return out, test
def add_softmax(self):
"""Adds a softmax operation to this model"""
with tf.variable_scope(self._get_layer_str()):
this_input = tf.square(self.get_output())
reduction_indices = list(range(1, len(this_input.get_shape())))
acc = tf.reduce_sum(this_input, reduction_indices=reduction_indices, keep_dims=True)
out = this_input / (acc+FLAGS.epsilon)
#out = tf.verify_tensor_all_finite(out, "add_softmax failed; is sum equal to zero?")
self.outputs.append(out)
return self
def build_summary_op(self):
cfg = self.config
self.saver = tf.train.Saver(max_to_keep=5)
self.summary_writer = tf.summary.FileWriter(
cfg['log/dir'], self.session.graph, flush_secs=2)
assert_op = tf.verify_tensor_all_finite(self.elbo_sum, 'ELBO check')
with tf.control_dependencies([assert_op]):
self.summary_op = tf.summary.merge_all()
def tf_bivariate_normal(y, mu, sigma, rho, n_mixtures, batch_size):
mu = tf.verify_tensor_all_finite(mu, "Mu not finite!")
y = tf.verify_tensor_all_finite(y, "Y not finite!")
delta = tf.sub(tf.tile(tf.expand_dims(y, 1), [1, n_mixtures, 1]), mu)
delta = tf.verify_tensor_all_finite(delta, "Delta not finite!")
sigma = tf.verify_tensor_all_finite(sigma, "Sigma not finite!")
s = tf.reduce_prod(sigma, 2)
s = tf.verify_tensor_all_finite(s, "S not finite!")
# -1 <= rho <= 1
z = tf.reduce_sum(tf.square(tf.div(delta, sigma + epsilon) + epsilon), 2) - \
2 * tf.div(tf.mul(rho, tf.reduce_prod(delta, 2)), s + epsilon)
z = tf.verify_tensor_all_finite(z, "Z not finite!")
# 0 < negRho <= 1
rho = tf.verify_tensor_all_finite(rho, "rho in bivariate normal not finite!")
negRho = tf.clip_by_value(1 - tf.square(rho), epsilon, 1.0)
negRho = tf.verify_tensor_all_finite(negRho, "negRho not finite!")
# Note that if negRho goes near zero, or z goes really large, this explodes.
negRho = tf.verify_tensor_all_finite(negRho, "negRho in bivariate normal not finite!")
result = tf.clip_by_value(tf.exp(tf.div(-z, 2 * negRho)), 1.0e-8, 1.0e8)
result = tf.verify_tensor_all_finite(result, "Result in bivariate normal not finite!")
denom = 2 * np.pi * tf.mul(s, tf.sqrt(negRho))
denom = tf.verify_tensor_all_finite(denom, "Denom in bivariate normal not finite!")
result = tf.clip_by_value(tf.div(result, denom + epsilon), epsilon, 1.0)
result = tf.verify_tensor_all_finite(result, "Result2 in bivariate normal not finite!")
return result, delta
def __init__(self, rnn_states, type_embedder, name='DelexicalizedDynamicPredicateEmbedder'):
"""Construct DelexicalizedDynamicPredicateEmbedder.
Args:
rnn_states (SequenceBatch): of shape (num_contexts, seq_length, rnn_state_dim)
type_embedder (TokenEmbedder)
name (str)
"""
self._type_embedder = type_embedder
with tf.name_scope(name):
# column indices of rnn_states (indexes time)
self._col_indices = FeedSequenceBatch() # (num_predicates, max_predicate_mentions)
# row indices of rnn_states (indexes utterance)
self._row_indices = tf.placeholder(dtype=tf.int32, shape=[None]) # (num_predicates,)
row_indices_expanded = expand_dims_for_broadcast(self._row_indices, self._col_indices.values)
# (num_predicates, max_predicate_mentions, rnn_state_dim)
rnn_states_selected = SequenceBatch(
gather_2d(rnn_states.values, row_indices_expanded, self._col_indices.values),
self._col_indices.mask)
# (num_predicates, rnn_state_dim)
rnn_embeds = reduce_mean(rnn_states_selected, allow_empty=True)
rnn_embeds = tf.verify_tensor_all_finite(rnn_embeds, "RNN-state-based embeddings")
self._type_seq_embedder = MeanSequenceEmbedder(type_embedder.embeds, name='TypeEmbedder')
self._embeds = tf.concat(1, [rnn_embeds, self._type_seq_embedder.embeds])
def embed(sequence_batch, embeds):
mask = sequence_batch.mask
embedded_values = tf.gather(embeds, sequence_batch.values)
embedded_values = tf.verify_tensor_all_finite(embedded_values, 'embedded_values')
# set all pad embeddings to zero
broadcasted_mask = expand_dims_for_broadcast(mask, embedded_values)
embedded_values *= broadcasted_mask
return SequenceBatch(embedded_values, mask)
def embed(sequence_batch, embeds):
mask = sequence_batch.mask
embedded_values = tf.gather(embeds, sequence_batch.values)
embedded_values = tf.verify_tensor_all_finite(embedded_values, 'embedded_values')
# set all pad embeddings to zero
broadcasted_mask = expand_dims_for_broadcast(mask, embedded_values)
embedded_values *= broadcasted_mask
return SequenceBatch(embedded_values, mask)
def add_softmax(self):
"""Adds a softmax operation to this model"""
with tf.variable_scope(self._get_layer_str()):
this_input = tf.square(self.get_output())
reduction_indices = list(range(1, len(this_input.get_shape())))
acc = tf.reduce_sum(this_input, reduction_indices=reduction_indices, keep_dims=True)
out = this_input / (acc+FLAGS.epsilon)
#out = tf.verify_tensor_all_finite(out, "add_softmax failed; is sum equal to zero?")
self.outputs.append(out)
return self
def RNN(parameters, input, model, initial_state):
# The model is:
# 1. input
# 2. linear layer
# 3 - n. LSTM layers
# n+1. linear layer
# n+1. output
input = tf.verify_tensor_all_finite(input, "Input not finite!")
# input shape: (batch_size, n_steps, n_input)
input = tf.transpose(input, [1, 0, 2]) # permute n_steps and batch_size
input = tf.verify_tensor_all_finite(input, "Input not finite2!")
# Reshape to prepare input to the linear layer
input = tf.reshape(input, [-1, parameters['n_input']]) # (n_steps*batch_size, n_input)
input = tf.verify_tensor_all_finite(input, "Input not finite3!")
# 1. layer, linear activation for each batch and step.
if (model.has_key('input_weights')):
input = tf.matmul(input, model['input_weights']) + model['input_bias']
# input = tf.nn.dropout(input, model['keep_prob'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop,
# that is, a n_steps length list of tensors shaped: (batch_size, n_inputs)
# This is not well documented, but check for yourself here: https://goo.gl/NzA5pX
input = tf.split(0, parameters['n_steps'], input) # n_steps * (batch_size, :)
initial_state = tf.verify_tensor_all_finite(initial_state, "Initial state not finite!")
# Note: States is shaped: batch_size x cell.state_size
outputs, states = rnn.rnn(model['rnn_cell'], input, initial_state=initial_state)
#outputs[-1] = tf.Print(outputs[-1], [outputs[-1]], "LSTM Output: ", summarize = 100)
lastOutput = tf.verify_tensor_all_finite(outputs[-1], "LSTM Outputs not finite!")
#lastOutput = tf.nn.dropout(lastOutput, model['keep_prob'])
# Only the last output is interesting for error back propagation and prediction.
# Note that all batches are handled together here.
raw_output = tf.matmul(lastOutput, model['output_weights']) + model['output_bias']
raw_output = tf.verify_tensor_all_finite(raw_output, "Raw output not finite!")
n_mixtures = parameters['n_mixtures']
batch_size = parameters['batch_size']
# And now, instead of just outputting the expected value, we output mixture distributions.
# The number of mixtures is intuitively the number of possible actions the target can take.
# The output is divided into triplets of n_mixtures mixture parameters for the 2 absolute position coordinates.
output = softmax_mixtures(raw_output, n_mixtures, batch_size)
#output = tf.Print(output, [output], "Output: ", summarize = 100)
output = tf.verify_tensor_all_finite(output, "Final output not finite!")
return (output, states)
# Returns the generative LSTM stack created based on the parameters.
# Processes one input at a time.
# Input shape is: 1 x (parameters['n_input'])
# State shape is: 1 x (parameters['n_input'])
def create_generative(parameters):
print('Creating the neural network model.')
tf.reset_default_graph()
# tf Graph input
x = tf.placeholder(tf.float32, shape=(1, parameters['n_input']), name='input')
x = tf.verify_tensor_all_finite(x, "X not finite!")
y = tf.placeholder(tf.float32, shape=(1, parameters['n_output']), name='expected_output')
y = tf.verify_tensor_all_finite(y, "Y not finite!")
x = tf.Print(x, [x], "X: ")
y = tf.Print(y, [y], "Y: ")
lstm_state_size = np.sum(parameters['lstm_layers']) * 2
# Note: Batch size is the first dimension in istate.
istate = tf.placeholder(tf.float32, shape=(None, lstm_state_size), name='internal_state')
lr = tf.placeholder(tf.float32, name='learning_rate')
# The target to track itself and its peers, each with x, y ## and velocity x and y.
input_size = (parameters['n_peers'] + 1) * 2
inputToRnn = parameters['input_layer']
if (parameters['input_layer'] == None):
inputToRnn = parameters['n_input']
cells = [rnn_cell.LSTMCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn,
num_proj=parameters['lstm_layers'][i],
cell_clip=parameters['lstm_clip'],
use_peepholes=True) for i,l in enumerate(parameters['lstm_layers'])]
# TODO: GRUCell support here.
# cells = [rnn_cell.GRUCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn) for i,l in enumerate(parameters['lstm_layers'])]
model = {
'input_weights': tf.Variable(tf.random_normal(
[input_size, parameters['input_layer']]), name='input_weights'),
'input_bias': tf.Variable(tf.random_normal([parameters['input_layer']]), name='input_bias'),
'output_weights': tf.Variable(tf.random_normal([parameters['lstm_layers'][-1],
# 6 = 2 sigma, 2 mean, weight, rho
parameters['n_mixtures'] * 6]),
name='output_weights'),
# We need to put at least the standard deviation output biases to about 5 to prevent zeros and infinities.
# , mean = 5.0, stddev = 3.0
'output_bias': tf.Variable(tf.random_normal([parameters['n_mixtures'] * 6]),
name='output_bias'),
'rnn_cell': rnn_cell.MultiRNNCell(cells),
'lr': lr,
'x': x,
'y': y,
'keep_prob': tf.placeholder(tf.float32),
'istate': istate
}
# The next variables need to be remapped, because we don't have RNN context anymore:
# RNN/MultiRNNCell/Cell0/LSTMCell/ -> MultiRNNCell/Cell0/LSTMCell/
# B, W_F_diag, W_O_diag, W_I_diag, W_0
with tf.variable_scope("RNN"):
pred = RNN_generative(parameters, x, model, istate)
model['pred'] = pred[0]
model['last_state'] = pred[1]
return model
def __init__(self, simple_scorer, attention_scorer, soft_copy_scorer):
"""
Args:
simple_scorer (SimplePredicateScorer)
attention_scorer (AttentionPredicateScorer)
soft_copy_scorer (SoftCopyPredicateScorer)
"""
assert isinstance(simple_scorer, SimplePredicateScorer)
assert isinstance(attention_scorer, AttentionPredicateScorer)
assert isinstance(soft_copy_scorer, SoftCopyPredicateScorer)
simple_scores = simple_scorer.scores # (batch_size, num_candidates)
attention_scores = attention_scorer.scores # (batch_size, num_candidates)
soft_copy_scores = soft_copy_scorer.scores # (batch_size, num_candidates)
# check that Tensors are finite
def verify_finite_inside_mask(scores, msg):
finite_scores = scores.with_pad_value(0).values
assert_op = tf.verify_tensor_all_finite(finite_scores, msg)
return assert_op
with tf.control_dependencies([
verify_finite_inside_mask(simple_scores, 'simple_scores'),
verify_finite_inside_mask(attention_scores, 'attention_scores'),
verify_finite_inside_mask(soft_copy_scores, 'soft copy scores'),
]):
scores = SequenceBatch(
simple_scores.values + attention_scores.values + soft_copy_scores.values,
simple_scores.mask)
subscores = SequenceBatch(
tf.pack(
[simple_scores.values, attention_scores.values, soft_copy_scores.values],
axis=2),
simple_scores.mask)
scores = scores.with_pad_value(-float('inf'))
probs = SequenceBatch(tf.nn.softmax(scores.values), scores.mask)
self._scores = scores
self._subscores = subscores
self._probs = probs
self._simple_scorer = simple_scorer
self._attention_scorer = attention_scorer
self._soft_copy_scorer = soft_copy_scorer
def loss(self, y_true, y_pred, mean=True):
scale_factor = self.scale_factor
eps = self.eps
with tf.name_scope(self.scope):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32) * scale_factor
if self.masking:
nelem = _nelem(y_true)
y_true = _nan2zero(y_true)
# Clip theta
theta = tf.minimum(self.theta, 1e6)
t1 = tf.lgamma(theta+eps) + tf.lgamma(y_true+1.0) - tf.lgamma(y_true+theta+eps)
t2 = (theta+y_true) * tf.log(1.0 + (y_pred/(theta+eps))) + (y_true * (tf.log(theta+eps) - tf.log(y_pred+eps)))
if self.debug:
assert_ops = [
tf.verify_tensor_all_finite(y_pred, 'y_pred has inf/nans'),
tf.verify_tensor_all_finite(t1, 't1 has inf/nans'),
tf.verify_tensor_all_finite(t2, 't2 has inf/nans')]
tf.summary.histogram('t1', t1)
tf.summary.histogram('t2', t2)
with tf.control_dependencies(assert_ops):
final = t1 + t2
else:
final = t1 + t2
final = _nan2inf(final)
if mean:
if self.masking:
final = tf.divide(tf.reduce_sum(final), nelem)
else:
final = tf.reduce_mean(final)
return final