def finalize(self, outputs : BeamSearchOptimizationDecoderOutput, final_state : BeamSearchOptimizationDecoderState, sequence_lengths):
# all output fields are [max_time, batch_size, ...]
predicted_ids = tf.contrib.seq2seq.gather_tree(
outputs.predicted_ids, outputs.parent_ids,
sequence_length=sequence_lengths, name='predicted_ids')
total_loss = tf.reduce_sum(outputs.loss, axis=0, name='violation_loss')
predicted_time = tf.shape(predicted_ids)[0]
last_score = predicted_time-1
with tf.name_scope('gold_score'):
gold_score = outputs.gold_score[last_score]
with tf.name_scope('sequence_scores'):
sequence_scores = outputs.scores[last_score]
return FinalBeamSearchOptimizationDecoderOutput(beam_search_decoder_output=outputs,
predicted_ids=predicted_ids,
scores=sequence_scores,
gold_score=gold_score,
gold_beam_id=final_state.gold_beam_id,
num_available_beams=final_state.num_available_beams,
total_violation_loss=total_loss), final_state
python类reduce_sum()的实例源码
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 46
收藏 0
点赞 0
评论 0
eval_output_embeddings.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def bag_of_tokens(config, labels, label_lengths):
if config.train_output_embeddings:
with tf.variable_scope('embed', reuse=True):
output_embeddings = tf.get_variable('output_embedding')
else:
output_embeddings = tf.constant(config.output_embedding_matrix)
#everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32)
#everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32)
labels = tf.constant(np.array(labels))
embedded_output = tf.gather(output_embeddings, labels)
print('embedded_output before', embedded_output)
#mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32)
# note: this multiplication will broadcast the mask along all elements of the depth dimension
# (which is why we run the expand_dims to choose how to broadcast)
#embedded_output = embedded_output * tf.expand_dims(mask, axis=2)
#print('embedded_output after', embedded_output)
return tf.reduce_sum(embedded_output, axis=1)
def triplet_loss(anchor, positive, negative, alpha):
"""Calculate the triplet loss according to the FaceNet paper
Args:
anchor: the embeddings for the anchor images.
positive: the embeddings for the positive images.
negative: the embeddings for the negative images.
Returns:
the triplet loss according to the FaceNet paper as a float tensor.
"""
with tf.variable_scope('triplet_loss'):
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
def build_model(self):
self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question")
self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer")
self.build_encoder()
self.build_decoder()
# Kullback Leibler divergence
self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq))
# Log likelihood
self.g_loss = tf.reduce_sum(tf.log(self.p_x_i))
self.loss = tf.reduce_mean(self.e_loss + self.g_loss)
self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss)
_ = tf.scalar_summary("encoder loss", self.e_loss)
_ = tf.scalar_summary("decoder loss", self.g_loss)
_ = tf.scalar_summary("loss", self.loss)
def calculate_loss_distill_boost(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_boost"):
print("loss_distill_boost")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
batch_size = tf.shape(float_labels)[0]
float_labels_distill = tf.cast(labels_distill, tf.float32)
error = tf.negative(float_labels * tf.log(float_labels_distill + epsilon) + (
1 - float_labels) * tf.log(1 - float_labels_distill + epsilon))
error = tf.reduce_sum(error,axis=1,keep_dims=True)
alpha = error / tf.reduce_sum(error) * tf.cast(batch_size,dtype=tf.float32)
alpha = tf.clip_by_value(alpha, 0.5, 5)
alpha = alpha / tf.reduce_sum(alpha) * tf.cast(batch_size,dtype=tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss * alpha)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
vocab_size = predictions.get_shape().as_list()[1]
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
neg_labels = 1 - float_labels
predictions_pos = predictions*float_labels+10*neg_labels
predictions_minpos = tf.reduce_min(predictions_pos,axis=1,keep_dims=True)
predictions_neg = predictions*neg_labels-10*float_labels
predictions_maxneg = tf.reduce_max(predictions_neg,axis=1,keep_dims=True)
mask_1 = tf.cast(tf.greater_equal(predictions_neg, predictions_minpos),dtype=tf.float32)
mask_2 = tf.cast(tf.less_equal(predictions_pos, predictions_maxneg),dtype=tf.float32)
cross_entropy_loss = cross_entropy_loss*(mask_1+mask_2)*10 + cross_entropy_loss
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
if FLAGS.label_smoothing:
float_labels = smoothing(labels)
else:
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if weights is not None:
print cross_entropy_loss, weights
weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
print "create weighted_loss", weighted_loss
return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
else:
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def compute_loss(self, decoder_output, _features, labels):
"""Computes the loss for this model.
Returns a tuple `(losses, loss)`, where `losses` are the per-batch
losses and loss is a single scalar tensor to minimize.
"""
#pylint: disable=R0201
# Calculate loss per example-timestep of shape [B, T]
losses = seq2seq_losses.cross_entropy_sequence_loss(
logits=decoder_output.logits[:, :, :],
targets=tf.transpose(labels["target_ids"][:, 1:], [1, 0]),
sequence_length=labels["target_len"] - 1)
# Calculate the average log perplexity
loss = tf.reduce_sum(losses) / tf.to_float(
tf.reduce_sum(labels["target_len"] - 1))
return losses, loss
def get_dice_coef(logits, labels):
"""Compute dice coefficient.
Args:
logits: Softmax probability applied to fuse layers.
labels: Correct annotations (0 or 1).
Returns:
Mean dice coefficient over full tensor.
Source:
https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/cost.py#L125
"""
smooth = 1e-5
inter = tf.reduce_sum(tf.multiply(logits, labels))
l = tf.reduce_sum(logits)
r = tf.reduce_sum(labels)
return tf.reduce_mean((2.0 * inter + smooth) / (l + r + smooth))
def __init__(self, embedding):
self.sess = tf.Session()
self.inputs = tf.placeholder(tf.float32,
[None, embedding.shape[1]],
name='inputs')
self.test_vec = tf.placeholder(tf.float32, [1, embedding.shape[1]],
name='test_vec')
self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec))
#-----------------------------------------------------------------------
# Compute normalized embedding matrix
#-----------------------------------------------------------------------
row_sum = tf.reduce_sum(tf.square(self.inputs), axis=1,
keep_dims=True)
norm = tf.sqrt(row_sum)
self.normalized = self.inputs / norm
self.embedding = self.sess.run(self.normalized,
feed_dict={self.inputs: embedding})
#---------------------------------------------------------------------------
def bin_stats(predictions: tf.Tensor, labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
"""
Calculate f1, precision and recall from binary classification expected and predicted values.
:param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes
:param labels: 2-d tensor (batch, labels) of expected 0/1 classes
:return: a tuple of batched (f1, precision and recall) values
"""
predictions = tf.cast(predictions, tf.int32)
labels = tf.cast(labels, tf.int32)
true_positives = tf.reduce_sum((predictions * labels), axis=1)
false_positives = tf.reduce_sum(tf.cast(tf.greater(predictions, labels), tf.int32), axis=1)
false_negatives = tf.reduce_sum(tf.cast(tf.greater(labels, predictions), tf.int32), axis=1)
recall = true_positives / (true_positives + false_negatives)
precision = true_positives / (true_positives + false_positives)
f1_score = 2 / (1 / precision + 1 / recall)
return f1_score, precision, recall
def bin_dice(predictions: tf.Tensor, labels: tf.Tensor) -> tf.Tensor:
"""
Calculate Sorensen–Dice coefficient from the given binary classification expected and predicted values.
The coefficient is defined as :math:`2*|X \cup Y| / (|X| + |Y|)`.
:param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes
:param labels: 2-d tensor (batch, labels) of expected 0/1 classes
:return: batched Sørensen–Dice coefficients
"""
predictions = tf.cast(predictions, tf.int32)
labels = tf.cast(labels, tf.int32)
true_positives = tf.reduce_sum((predictions * labels), axis=1)
pred_positives = tf.reduce_sum(predictions, axis=1)
label_positives = tf.reduce_sum(labels, axis=1)
return 2 * true_positives / (pred_positives + label_positives)
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
def leapfrog(pos, vel, step, i):
de_dp_ = tf.gradients(tf.reduce_sum(energy_fn(pos)), pos)[0]
new_vel_ = vel - step * de_dp_
new_pos_ = pos + step * new_vel_
return [new_pos_, new_vel_, step, tf.add(i, 1)]
def condition(pos, vel, step, i):
return tf.less(i, n_steps)
de_dp = tf.gradients(tf.reduce_sum(energy_fn(initial_pos)), initial_pos)[0]
vel_half_step = initial_vel - 0.5 * stepsize * de_dp
pos_full_step = initial_pos + stepsize * vel_half_step
i = tf.constant(0)
final_pos, new_vel, _, _ = tf.while_loop(condition, leapfrog, [pos_full_step, vel_half_step, stepsize, i])
de_dp = tf.gradients(tf.reduce_sum(energy_fn(final_pos)), final_pos)[0]
final_vel = new_vel - 0.5 * stepsize * de_dp
return final_pos, final_vel
def build_model(self):
Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z])
Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y])
image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape)
h4 = self.generate(Z,Y)
#image_gen comes from sigmoid output of generator
image_gen = tf.nn.sigmoid(h4)
raw_real2 = self.discriminate(image_real, Y)
#p_real = tf.nn.sigmoid(raw_real)
p_real=tf.reduce_mean(raw_real2)
raw_gen2 = self.discriminate(image_gen, Y)
#p_gen = tf.nn.sigmoid(raw_gen)
p_gen = tf.reduce_mean(raw_gen2)
discrim_cost = tf.reduce_sum(raw_real2) - tf.reduce_sum(raw_gen2)
gen_cost = -tf.reduce_mean(raw_gen2)
return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen
def categorical_crossentropy_3d(y_true, y_predicted):
"""
Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array
with shape (num_samples, num_classes, dim1, dim2, dim3)
Parameters
----------
y_true : keras.placeholder [batches, dim0,dim1,dim2]
Placeholder for data holding the ground-truth labels encoded in a one-hot representation
y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2]
Placeholder for data holding the softmax distribution over classes
Returns
-------
scalar
Categorical cross-entropy loss value
"""
y_true_flatten = K.flatten(y_true)
y_pred_flatten = K.flatten(y_predicted)
y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon())
num_total_elements = K.sum(y_true_flatten)
# cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log))
cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log))
mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon())
return mean_cross_entropy
def step_with_training(self, training=None):
def step(inputs, states):
input_shape = K.int_shape(inputs)
y_tm1 = self.layer.preprocess_input(
K.expand_dims(states[0], axis=1),
training
)
y_tm1 = K.reshape(y_tm1, (-1, input_shape[-1]))
inputs_sum = tf.reduce_sum(inputs)
def inputs_f(): return inputs
def output_f(): return y_tm1
current_inputs = tf.case(
[(tf.equal(inputs_sum, 0.0), output_f)],
default=inputs_f
)
return self.layer.step(
current_inputs,
states
)
return step
def l1_regularizer(weight=1.0, scope=None):
"""Define a L1 regularizer.
Args:
weight: scale the loss by this factor.
scope: Optional scope for op_scope.
Returns:
a regularizer function.
"""
def regularizer(tensor):
with tf.op_scope([tensor], scope, 'L1Regularizer'):
l1_weight = tf.convert_to_tensor(weight,
dtype=tensor.dtype.base_dtype,
name='weight')
return tf.mul(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value')
return regularizer
def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None):
"""Define a L1L2 regularizer.
Args:
weight_l1: scale the L1 loss by this factor.
weight_l2: scale the L2 loss by this factor.
scope: Optional scope for op_scope.
Returns:
a regularizer function.
"""
def regularizer(tensor):
with tf.op_scope([tensor], scope, 'L1L2Regularizer'):
weight_l1_t = tf.convert_to_tensor(weight_l1,
dtype=tensor.dtype.base_dtype,
name='weight_l1')
weight_l2_t = tf.convert_to_tensor(weight_l2,
dtype=tensor.dtype.base_dtype,
name='weight_l2')
reg_l1 = tf.mul(weight_l1_t, tf.reduce_sum(tf.abs(tensor)),
name='value_l1')
reg_l2 = tf.mul(weight_l2_t, tf.nn.l2_loss(tensor),
name='value_l2')
return tf.add(reg_l1, reg_l2, name='value')
return regularizer
def l1_loss(tensor, weight=1.0, scope=None):
"""Define a L1Loss, useful for regularize, i.e. lasso.
Args:
tensor: tensor to regularize.
weight: scale the loss by this factor.
scope: Optional scope for op_scope.
Returns:
the L1 loss op.
"""
with tf.op_scope([tensor], scope, 'L1Loss'):
weight = tf.convert_to_tensor(weight,
dtype=tensor.dtype.base_dtype,
name='loss_weight')
loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value')
tf.add_to_collection(LOSSES_COLLECTION, loss)
return loss
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs):
"""
Cost for given input batch of samples, under current params.
"""
h = self.get_h_inputs(inputs)
z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz']
z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz']
# KL divergence between latent space induced by encoder and ...
lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1)
z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation
h = self.get_h_latents(z)
x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx'])
x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx'])
# x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1)
# decoding likelihood term
like_loss = tf.reduce_sum(tf.log(x_sig + eps) +
(inputs - x_mu)**2 / x_sig, 1)
# # Mean cross entropy between input and encode-decoded input.
# like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1)
return .5 * tf.reduce_mean(like_loss + lat_loss)
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'):
num_inputs=df_dim*4
theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05))
log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0))
W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0))
W = tf.reshape(W,[-1,num_kernels*dim_per_kernel])
x = input
x=tf.reshape(x, [batchsize,num_inputs])
activation = tf.matmul(x, W)
activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel])
abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2),
1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1))
f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif))
print(f.get_shape())
print(input.get_shape())
return tf.concat(1,[x, f])
def output_module(self):
"""
1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label.
input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory
:return:y: predicted label.[]
"""
# 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state.
# 1.1 get possibility distribution (of similiarity)
p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size]
# 1.2 get weighted sum of hidden state
u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size])
# 2.non-linearity of query and hidden state to get label
H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size])
activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden") #shape:[batch_size,hidden_size]
activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size]
y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size])
return y #shape:[batch_size,vocab_size]
a2_transformer_classification.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def loss(self, l2_lambda=0.0001): # 0.001
with tf.name_scope("loss"):
# input: `logits`:[batch_size, num_classes], and `labels`:[batch_size]
# output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss.
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
# print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,)
loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=()
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss
#def loss_seq2seq(self):
# with tf.variable_scope("loss"):
# losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label, logits=self.logits);#losses:[batch_size,self.decoder_sent_length]
# loss_batch=tf.reduce_sum(losses,axis=1)/self.decoder_sent_length #loss_batch:[batch_size]
# loss=tf.reduce_mean(loss_batch)
# l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda
# loss = loss + l2_losses
# return loss
def SoftArgmin(outputLeft, outputRight, D=192):
left_result_D = outputLeft
right_result_D = outputRight
left_result_D_squeeze = tf.squeeze(left_result_D, axis=[0, 4])
right_result_D_squeeze = tf.squeeze(right_result_D, axis=[0, 4]) # 192 256 512
left_result_softmax = tf.nn.softmax(left_result_D_squeeze, dim=0)
right_result_softmax = tf.nn.softmax(right_result_D_squeeze, dim=0) # 192 256 512
d_grid = tf.cast(tf.range(D), tf.float32)
d_grid = tf.reshape(d_grid, (-1, 1, 1))
d_grid = tf.tile(d_grid, [1, 256, 512])
left_softargmin = tf.reduce_sum(tf.multiply(left_result_softmax, d_grid), axis=0, keep_dims=True)
right_softargmin = tf.reduce_sum(tf.multiply(right_result_softmax, d_grid), axis=0, keep_dims=True)
return left_softargmin, right_softargmin
def custom_loss(y_true, y_pred):
# Get prediction
pred_box_xy = tf.sigmoid(y_pred[..., :2])
pred_box_wh = y_pred[..., 2:4]
pred_box_conf = tf.sigmoid(y_pred[..., 4])
# Get ground truth
true_box_xy = y_true[..., :2]
true_box_wh = y_true[..., 2:4]
true_box_conf = y_true[..., 4]
# Determine the mask: simply the position of the ground truth boxes (the predictors)
true_mask = tf.expand_dims(y_true[..., 4], axis=-1)
# Calculate the loss. A scale can be associated with each loss, indicating how important
# the loss is. The bigger the scale, more important the loss is.
loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * true_mask) * 1.0
loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * true_mask) * 1.0
loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf)) * 1.2
loss = loss_xy + loss_wh + loss_conf
return loss
def build_loss(self, inp, output):
y_gt = inp['y_gt']
y_out = output['y_out']
ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out})
num_ex_f = tf.to_float(tf.shape(inp['x'])[0])
ce = tf.reduce_sum(ce) / num_ex_f
self.add_loss(ce)
total_loss = self.get_loss()
self.register_var('loss', total_loss)
ans = tf.argmax(y_gt, 1)
correct = tf.equal(ans, tf.argmax(y_out, 1))
top5_acc = tf.reduce_sum(tf.to_float(
tf.nn.in_top_k(y_out, ans, 5))) / num_ex_f
self.register_var('top5_acc', top5_acc)
acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f
self.register_var('acc', acc)
return total_loss
def build_loss_grad(self, inp, output):
y_gt = inp['y_gt']
y_out = output['y_out']
ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out})
num_ex_f = tf.to_float(tf.shape(inp['x'])[0])
ce = tf.reduce_sum(ce) / num_ex_f
self.add_loss(ce)
learn_rate = self.get_option('learn_rate')
total_loss = self.get_loss()
self.register_var('loss', total_loss)
eps = self.get_option('adam_eps')
optimizer = tf.train.AdamOptimizer(learn_rate, epsilon=eps)
global_step = tf.Variable(0.0)
self.register_var('step', global_step)
train_step = optimizer.minimize(
total_loss, global_step=global_step)
self.register_var('train_step', train_step)
correct = tf.equal(tf.argmax(y_gt, 1), tf.argmax(y_out, 1))
acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f
self.register_var('acc', acc)
pass
def _score(self, prev_decoder_state, prev_embedding):
# Returns scores in a tensor of shape [batch_size, input_sequence_length]
if self.mode == 'decode':
query_part = self.query_attention_partial_score_placeholder
encoder_part = self.encoder_state_attention_partial_scores_placeholder
else:
query_part = self.query_attention_partial_score
encoder_part = self.encoder_state_attention_partial_scores
embedding_part = tf.matmul(prev_embedding, self.attention_w_e)
output = tf.matmul(prev_decoder_state,
self.attention_w) + embedding_part + query_part + encoder_part + self.attention_b
output = tf.tanh(output)
output = tf.reduce_sum(self.attention_v * output, axis=2)
output = tf.transpose(output, [1, 0])
# Handle input document padding by giving a large penalty, eliminating it from the weighted average
padding_penalty = -1e20 * tf.to_float(1 - tf.sign(self.documents_placeholder))
masked = output + padding_penalty
return masked