def calculate_loss_distill_relabel(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_relabel"):
print("loss_distill_relabel")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
sum_labels = tf.cast(tf.reduce_sum(float_labels),dtype=tf.int32)
pos_distill, _ = tf.nn.top_k(tf.reshape(labels_distill,[-1]), k=sum_labels)
labels_true = tf.ones(tf.shape(labels))
labels_false = tf.zeros(tf.shape(labels))
labels_add = tf.where(tf.greater_equal(labels_distill, pos_distill[-1]), labels_true, labels_false)
print(labels_add.get_shape().as_list())
float_labels = float_labels+labels_add*(1.0-float_labels)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
python类log()的实例源码
def calculate_loss(self, predictions, labels, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
vocab_size = predictions.get_shape().as_list()[1]
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
neg_labels = 1 - float_labels
predictions_pos = predictions*float_labels+10*neg_labels
predictions_minpos = tf.reduce_min(predictions_pos,axis=1,keep_dims=True)
predictions_neg = predictions*neg_labels-10*float_labels
predictions_maxneg = tf.reduce_max(predictions_neg,axis=1,keep_dims=True)
mask_1 = tf.cast(tf.greater_equal(predictions_neg, predictions_minpos),dtype=tf.float32)
mask_2 = tf.cast(tf.less_equal(predictions_pos, predictions_maxneg),dtype=tf.float32)
cross_entropy_loss = cross_entropy_loss*(mask_1+mask_2)*10 + cross_entropy_loss
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, **unused_params):
bound = FLAGS.softmax_bound
vocab_size_1 = bound
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
labels_1 = float_labels[:,:vocab_size_1]
predictions_1 = predictions[:,:vocab_size_1]
cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
lables_2 = float_labels[:,vocab_size_1:]
predictions_2 = predictions[:,vocab_size_1:]
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(lables_2, 1, keep_dims=True),
epsilon)
label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
if FLAGS.label_smoothing:
float_labels = smoothing(labels)
else:
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if weights is not None:
print cross_entropy_loss, weights
weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
print "create weighted_loss", weighted_loss
return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
else:
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
with tf.name_scope("loss_xent"):
epsilon = 10e-6
if FLAGS.label_smoothing:
float_labels = smoothing(labels)
else:
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if weights is not None:
print cross_entropy_loss, weights
weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
print "create weighted_loss", weighted_loss
return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
else:
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def sample_dtype(self):
return tf.int32
# WRONG SECOND DERIVATIVES
# class CategoricalPd(Pd):
# def __init__(self, logits):
# self.logits = logits
# self.ps = tf.nn.softmax(logits)
# @classmethod
# def fromflat(cls, flat):
# return cls(flat)
# def flatparam(self):
# return self.logits
# def mode(self):
# return U.argmax(self.logits, axis=1)
# def logp(self, x):
# return -tf.nn.sparse_softmax_cross_entropy_with_logits(self.logits, x)
# def kl(self, other):
# return tf.nn.softmax_cross_entropy_with_logits(other.logits, self.ps) \
# - tf.nn.softmax_cross_entropy_with_logits(self.logits, self.ps)
# def entropy(self):
# return tf.nn.softmax_cross_entropy_with_logits(self.logits, self.ps)
# def sample(self):
# u = tf.random_uniform(tf.shape(self.logits))
# return U.argmax(self.logits - tf.log(-tf.log(u)), axis=1)
def __call__(self, z):
z1 = tf.reshape(tf.slice(z, [0, 0], [-1, 1]), [-1])
z2 = tf.reshape(tf.slice(z, [0, 1], [-1, 1]), [-1])
v1 = tf.sqrt((z1 - 5) * (z1 - 5) + z2 * z2) * 2
v2 = tf.sqrt((z1 + 5) * (z1 + 5) + z2 * z2) * 2
v3 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2
v4 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2
v5 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2
v6 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2
pdf1 = tf.exp(-0.5 * v1 * v1) / tf.sqrt(2 * np.pi * 0.25)
pdf2 = tf.exp(-0.5 * v2 * v2) / tf.sqrt(2 * np.pi * 0.25)
pdf3 = tf.exp(-0.5 * v3 * v3) / tf.sqrt(2 * np.pi * 0.25)
pdf4 = tf.exp(-0.5 * v4 * v4) / tf.sqrt(2 * np.pi * 0.25)
pdf5 = tf.exp(-0.5 * v5 * v5) / tf.sqrt(2 * np.pi * 0.25)
pdf6 = tf.exp(-0.5 * v6 * v6) / tf.sqrt(2 * np.pi * 0.25)
return -tf.log((pdf1 + pdf2 + pdf3 + pdf4 + pdf5 + pdf6) / 6)
def tf_truncexpon(batch_size,rate,right):
'''
a tensorflow node that returns a random variable
sampled from an Exp(rate) random variable
which has been truncated and normalized to [0,right]
#Leverages that log of uniform is exponential
batch_size: a tensorflow placeholder to sync batch_size everywhere
rate: lambda rate parameter for exponential dist
right: float in (0,inf) where to truncate exp distribution
'''
uleft=tf.exp(-1*rate*right)
U=tf.random_uniform(shape=(batch_size,1),minval=uleft,maxval=1)
tExp=(-1/rate)*tf.log(U)
return tExp
def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing
def binary_cross_entropy(preds, targets, name=None):
"""Computes binary cross entropy given `preds`.
For brevity, let `x = `, `z = targets`. The logistic loss is
loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
Args:
preds: A `Tensor` of type `float32` or `float64`.
targets: A `Tensor` of the same type and shape as `preds`.
"""
eps = 1e-12
with ops.op_scope([preds, targets], name, "bce_loss") as name:
preds = ops.convert_to_tensor(preds, name="preds")
targets = ops.convert_to_tensor(targets, name="targets")
return tf.reduce_mean(-(targets * tf.log(preds + eps) +
(1. - targets) * tf.log(1. - preds + eps)))
def _bbox_transform(self, ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = tf.log(gt_widths / ex_widths)
targets_dh = tf.log(gt_heights / ex_heights)
targets = tf.transpose(tf.pack(
(targets_dx, targets_dy, targets_dw, targets_dh),
axis=0
))
return targets
def indices_to_load_by_target_index(allowed_characters_for_loaded_model: List[chr],
allowed_characters: List[chr]) -> List[Optional[int]]:
load_character_set = set(allowed_characters_for_loaded_model)
target_character_set = set(allowed_characters)
ignored = load_character_set - target_character_set
if ignored:
log("Ignoring characters {} from loaded model.".format(sorted(ignored)))
extra = target_character_set - load_character_set
if extra:
log("Initializing extra characters {} not found in model.".format(sorted(extra)))
def character_index_to_load(target_character: chr) -> Optional[int]:
return single_or_none([index for index, character in enumerate(allowed_characters_for_loaded_model) if
character == target_character])
character_mapping = [character_index_to_load(character) for character in allowed_characters]
log("Character mapping: {}".format(character_mapping))
return character_mapping
def _decode_lambda(self, args):
"""
Decoding within tensorflow graph.
In case kenlm_directory is specified, a modified version of tensorflow
(available at https://github.com/timediv/tensorflow-with-kenlm)
is needed to run that extends ctc_decode to use a kenlm decoder.
:return:
Most probable decoded sequence. Important: blank labels are returned as `-1`.
"""
import tensorflow as tf
prediction_batch, prediction_lengths = args
log_prediction_batch = tf.log(tf.transpose(prediction_batch, perm=[1, 0, 2]) + 1e-8)
prediction_length_batch = tf.to_int32(tf.squeeze(prediction_lengths, axis=[1]))
(decoded, log_prob) = self.ctc_get_decoded_and_log_probability_batch(log_prediction_batch,
prediction_length_batch)
return single([tf.sparse_to_dense(st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded])
def train(self,
labeled_spectrogram_batches: Iterable[List[LabeledSpectrogram]],
preview_labeled_spectrogram_batch: List[LabeledSpectrogram],
tensor_board_log_directory: Path,
net_directory: Path,
batches_per_epoch: int):
print_preview_batch = lambda: log(self.test_and_predict_batch(preview_labeled_spectrogram_batch))
print_preview_batch()
self.loss_net.fit_generator(self._loss_inputs_generator(labeled_spectrogram_batches), epochs=100000000,
steps_per_epoch=batches_per_epoch,
callbacks=self.create_callbacks(
callback=print_preview_batch,
tensor_board_log_directory=tensor_board_log_directory,
net_directory=net_directory),
initial_epoch=self.load_epoch if (self.load_epoch is not None) else 0)
def tabular_kl(p, q, zero_prob_value=0., logarg_clip=None):
"""Computes KL-divergence KL(p||q) for two probability mass functions (pmf) given in a tabular form.
:param p: iterable
:param q: iterable
:param zero_prob_value: float; values below this threshold are treated as zero
:param logarg_clip: float or None, clips the argument to the log to lie in [-logarg_clip, logarg_clip] if not None
:return: iterable of brodcasted shape of (p * q), per-coordinate value of KL(p||q)
"""
p, q = (tf.cast(i, tf.float64) for i in (p, q))
non_zero = tf.greater(p, zero_prob_value)
logarg = p / q
if logarg_clip is not None:
logarg = clip_preserve(logarg, 1. / logarg_clip, logarg_clip)
log = masked_apply(logarg, tf.log, non_zero)
kl = p * log
return tf.cast(kl, tf.float32)
def kl_normal(mu0, var0,
mu1=0.0, var1=1.0):
"""KL divergence for normal distribution.
Note that this is a simple version. We don't use covariance matrix (?) here. Instead,
var is the vector that indicates the elements in ?'s main diagonal (diag(?)).
:param mu0: ?0.
:param var0: diag(?0).
:param mu1: ?1.
:param var1: diag(?1).
:return: The KL divergence.
"""
e = 1e-4
var0 += e
if mu1 == 0.0 and var1 == 1.0:
kl = var0 + mu0 ** 2 - 1 - tf.log(var0)
else:
var1 += e
kl = var0 / var1 + (mu0 - mu1) ** 2 / var1 - 1 - tf.log(var0 / var1)
kl = 0.5 * tf.reduce_sum(kl, 1)
return kl
def __init__(self, lr, s_size, a_size):
self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
state_in_OH = slim.one_hot_encoding(self.state_in, s_size)
output = slim.fully_connected(state_in_OH,
a_size,
biases_initializer=None,
activation_fn=tf.nn.sigmoid,
weights_initializer=tf.ones_initializer())
self.output = tf.reshape(output, [-1])
self.chosen_action = tf.argmax(self.output, 0)
self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
self.responsible_weight = tf.slice(self.output, self.action_holder, [1])
self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)
def __init__(self, lr, s_size, a_size):
self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
state_in_OH = slim.one_hot_encoding(self.state_in, s_size)
output = slim.fully_connected(state_in_OH,
a_size,
biases_initializer=None,
activation_fn=tf.nn.sigmoid,
weights_initializer=tf.ones_initializer())
self.output = tf.reshape(output, [-1])
self.chosen_action = tf.argmax(self.output, 0)
self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
self.responsible_weight = tf.slice(self.output, self.action_holder, [1])
self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)
def softmax_loss(self, antecedent_scores, antecedent_labels):
"""
Computes the value of the loss function using antecedent_scores and antecedent_labels.
Practically standard softmax function.
Args:
antecedent_scores: tf.float64, [num_mentions, max_ant + 1], output of fully-connected network that compute
antecedent scores.
antecedent_labels: True labels for antecedent.
Returns: [num_mentions]
The value of loss function.
"""
gold_scores = antecedent_scores + tf.log(tf.cast(antecedent_labels, tf.float64)) # [num_mentions, max_ant + 1]
marginalized_gold_scores = tf.reduce_logsumexp(gold_scores, [1]) # [num_mentions]
log_norm = tf.reduce_logsumexp(antecedent_scores, [1]) # [num_mentions]
return log_norm - marginalized_gold_scores # [num_mentions]
policy_gradient_actor_critic.py 文件源码
项目:RFR-solution
作者: baoblackcoal
项目源码
文件源码
阅读 50
收藏 0
点赞 0
评论 0
def policy_gradient():
with tf.variable_scope("policy"):
params = tf.get_variable("policy_parameters", [4, 2])
state = tf.placeholder("float", [None, 4])
actions = tf.placeholder("float", [None, 2])
advantages = tf.placeholder("float", [None, 1])
reward_input = tf.placeholder("float")
episode_reward = tf.get_variable("episode_reward", initializer=tf.constant(0.))
episode_reward = reward_input
linear = tf.matmul(state, params)
probabilities = tf.nn.softmax(linear)
good_probabilities = tf.reduce_sum(tf.mul(probabilities, actions), reduction_indices=[1])
eligibility = tf.log(good_probabilities) * advantages
loss = -tf.reduce_sum(eligibility)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.scalar_summary("loss", loss)
tf.scalar_summary("episode_reward", episode_reward)
return probabilities, state, actions, advantages, optimizer, reward_input, episode_reward
def logistic_loss(positive_scores, negative_scores):
"""
Pairwise logistic loss [1]:
loss(p, n) = \sum_i log(1 + e^(1 - p_i + n_i))
[1] http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf
Args:
positive_scores: (N,) Tensor containing scores of positive examples.
negative_scores: (N,) Tensor containing scores of negative examples.
Returns:
Loss value.
"""
logistic_losses = tf.log(1 + tf.exp(1 - positive_scores + negative_scores))
loss = tf.reduce_sum(logistic_losses)
return loss
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def categorical_crossentropy(output, target, from_logits=False):
'''Categorical crossentropy between an output tensor
and a target tensor, where the target is a tensor of the same
shape as the output.
'''
# Note: tf.nn.softmax_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# scale preds so that the class probas of each sample sum to 1
output /= tf.reduce_sum(output,
reduction_indices=len(output.get_shape()) - 1,
keep_dims=True)
# manual computation of crossentropy
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
output = tf.clip_by_value(output, epsilon, 1. - epsilon)
return - tf.reduce_sum(target * tf.log(output),
reduction_indices=len(output.get_shape()) - 1)
else:
return tf.nn.softmax_cross_entropy_with_logits(output, target)
def sparse_categorical_crossentropy(output, target, from_logits=False):
'''Categorical crossentropy between an output tensor
and a target tensor, where the target is an integer tensor.
'''
# Note: tf.nn.softmax_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
output = tf.clip_by_value(output, epsilon, 1 - epsilon)
output = tf.log(output)
output_shape = output.get_shape()
res = tf.nn.sparse_softmax_cross_entropy_with_logits(
tf.reshape(output, [-1, int(output_shape[-1])]),
cast(flatten(target), 'int64'))
if len(output_shape) == 3:
# if our output includes timesteps we need to reshape
return tf.reshape(res, [-1, int(output_shape[-2])])
else:
return res
def encode_bboxes_tf(proposals, gt, config):
"""Encode bbox coordinates in a format
used for computing the loss"""
prop_x = proposals[..., 0]
prop_y = proposals[..., 1]
prop_w = proposals[..., 2]
prop_h = proposals[..., 3]
gt_x = gt[..., 0]
gt_y = gt[..., 1]
gt_w = gt[..., 2]
gt_h = gt[..., 3]
diff_x = (gt_x + 0.5*gt_w - prop_x - 0.5*prop_w)/prop_w
diff_y = (gt_y + 0.5*gt_h - prop_y - 0.5*prop_h)/prop_h
diff_w = tf.log(gt_w/prop_w)
diff_h = tf.log(gt_h/prop_h)
var_x, var_y, var_w, var_h = config['prior_variance']
x = tf.stack([diff_x/var_x, diff_y/var_y, diff_w/var_w, diff_h/var_h], -1)
return x