def bboxes_filter_overlap(labels, bboxes,
threshold=0.5, assign_negative=False,
scope=None):
"""Filter out bounding boxes based on (relative )overlap with reference
box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative
labels to the one outside (useful for latter processing...).
Return:
labels, bboxes: Filtered (or newly assigned) elements.
"""
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
bboxes)
mask = scores > threshold
if assign_negative:
labels = tf.where(mask, labels, -labels)
# bboxes = tf.where(mask, bboxes, bboxes)
else:
labels = tf.boolean_mask(labels, mask)
bboxes = tf.boolean_mask(bboxes, mask)
return labels, bboxes
python类where()的实例源码
def _filter_negative_samples(labels, tensors):
"""keeps only samples with none-negative labels
Params:
-----
labels: of shape (N,)
tensors: a list of tensors, each of shape (N, .., ..) the first axis is sample number
Returns:
-----
tensors: filtered tensors
"""
# return tensors
keeps = tf.where(tf.greater_equal(labels, 0))
keeps = tf.reshape(keeps, [-1])
filtered = []
for t in tensors:
tf.assert_equal(tf.shape(t)[0], tf.shape(labels)[0])
f = tf.gather(t, keeps)
filtered.append(f)
return filtered
def _symmetric_matrix_square_root(mat, eps=1e-10):
"""Compute square root of a symmetric matrix.
Note that this is different from an elementwise square root. We want to
compute M' where M' = sqrt(mat) such that M' * M' = mat.
Also note that this method **only** works for symmetric matrices.
Args:
mat: Matrix to take the square root of.
eps: Small epsilon such that any element less than eps will not be square
rooted to guard against numerical instability.
Returns:
Matrix square root of mat.
"""
# Unlike numpy, tensorflow's return order is (s, u, v)
s, u, v = tf.svd(mat)
# sqrt is unstable around 0, just use 0 in such case
si = tf.where(tf.less(s, eps), s, tf.sqrt(s))
# Note that the v returned by Tensorflow is v = V
# (when referencing the equation A = U S V^T)
# This is unlike Numpy which returns v = V^T
return tf.matmul(
tf.matmul(u, tf.diag(si)), v, transpose_b=True)
def masked_apply(tensor, op, mask):
"""Applies `op` to tensor only at locations indicated by `mask` and sets the rest to zero.
Similar to doing `tensor = tf.where(mask, op(tensor), tf.zeros_like(tensor))` but it behaves correctly
when `op(tensor)` is NaN or inf while tf.where does not.
:param tensor: tf.Tensor
:param op: tf.Op
:param mask: tf.Tensor with dtype == bool
:return: tf.Tensor
"""
chosen = tf.boolean_mask(tensor, mask)
applied = op(chosen)
idx = tf.to_int32(tf.where(mask))
result = tf.scatter_nd(idx, applied, tf.shape(tensor))
return result
def value_transition(self, curr_state, next_symbols, batch_size):
first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens
num_value_tokens = self.output_size - first_value_token
with tf.name_scope('grammar_transition'):
adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols)
assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols])
with tf.control_dependencies([assert1]):
transitions = tf.gather(tf.constant(self.transition_matrix), curr_state)
assert transitions.get_shape()[1:] == (self.output_size,)
indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1)
next_state = tf.gather_nd(transitions, indices)
assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state])
with tf.control_dependencies([assert2]):
return tf.identity(next_state)
def calculate_loss_distill_relabel(self, predictions, labels_distill, labels, **unused_params):
with tf.name_scope("loss_distill_relabel"):
print("loss_distill_relabel")
epsilon = 10e-6
float_labels = tf.cast(labels, tf.float32)
sum_labels = tf.cast(tf.reduce_sum(float_labels),dtype=tf.int32)
pos_distill, _ = tf.nn.top_k(tf.reshape(labels_distill,[-1]), k=sum_labels)
labels_true = tf.ones(tf.shape(labels))
labels_false = tf.zeros(tf.shape(labels))
labels_add = tf.where(tf.greater_equal(labels_distill, pos_distill[-1]), labels_true, labels_false)
print(labels_add.get_shape().as_list())
float_labels = float_labels+labels_add*(1.0-float_labels)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def huber_loss(x, delta=1.0):
"""Reference: https://en.wikipedia.org/wiki/Huber_loss"""
return tf.where(
tf.abs(x) < delta,
tf.square(x) * 0.5,
delta * (tf.abs(x) - 0.5 * delta)
)
# ================================================================
# Basic Stuff
# ================================================================
# ================================================================
# Theano-like Function
# ================================================================
# ================================================================
# Optimizer utils
# ================================================================
def _generate_labels(self, overlaps):
labels = tf.Variable(tf.ones(shape=(tf.shape(overlaps)[0],), dtype=tf.float32) * -1, trainable=False,
validate_shape=False)
gt_max_overlaps = tf.arg_max(overlaps, dimension=0)
anchor_max_overlaps = tf.arg_max(overlaps, dimension=1)
mask = tf.one_hot(anchor_max_overlaps, tf.shape(overlaps)[1], on_value=True, off_value=False)
max_overlaps = tf.boolean_mask(overlaps, mask)
if self._debug:
max_overlaps = tf.Print(max_overlaps, [max_overlaps])
labels = tf.scatter_update(labels, gt_max_overlaps, tf.ones((tf.shape(gt_max_overlaps)[0],)))
# TODO: extract config object
over_threshold_mask = tf.reshape(tf.where(max_overlaps > 0.5), (-1,))
if self._debug:
over_threshold_mask = tf.Print(over_threshold_mask, [over_threshold_mask], message='over threshold index : ')
labels = tf.scatter_update(labels, over_threshold_mask, tf.ones((tf.shape(over_threshold_mask)[0],)))
# TODO: support clobber positive in the origin implement
below_threshold_mask = tf.reshape(tf.where(max_overlaps < 0.3), (-1,))
if self._debug:
below_threshold_mask = tf.Print(below_threshold_mask, [below_threshold_mask], message='below threshold index : ')
labels = tf.scatter_update(labels, below_threshold_mask, tf.zeros((tf.shape(below_threshold_mask)[0],)))
return labels
def __init__(self, actions):
self.replayMemory = deque()
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
self.actions = actions
self.files = 0
self.currentQNet = QNet(len(actions))
self.targetQNet = QNet(len(actions))
self.actionInput = tf.placeholder("float", [None, len(actions)],name="actions_one_hot")
self.yInput = tf.placeholder("float", [None],name="y")
self.action_mask = tf.multiply(self.currentQNet.QValue, self.actionInput)
self.Q_action = tf.reduce_sum(self.action_mask, reduction_indices=1)
self.delta = delta = tf.subtract(self.Q_action, self.yInput)
self.loss = tf.where(tf.abs(delta) < 1.0, 0.5 * tf.square(delta), tf.abs(delta) - 0.5)
#self.loss = tf.square(tf.subtract( self.Q_action, self.yInput ))
self.cost = tf.reduce_mean(self.loss)
self.trainStep = tf.train.RMSPropOptimizer(learning_rate=RMS_LEARNING_RATE,momentum=RMS_MOMENTUM,epsilon= RMS_EPSILON,decay=RMS_DECAY).minimize(
self.cost)
#
def get_acceptance_rate(q, p, new_q, new_p, log_posterior, mass, data_axes):
old_hamiltonian, old_log_prob = hamiltonian(
q, p, log_posterior, mass, data_axes)
new_hamiltonian, new_log_prob = hamiltonian(
new_q, new_p, log_posterior, mass, data_axes)
old_log_prob = tf.check_numerics(
old_log_prob,
'HMC: old_log_prob has numeric errors! Try better initialization.')
acceptance_rate = tf.exp(
tf.minimum(-new_hamiltonian + old_hamiltonian, 0.0))
is_finite = tf.logical_and(tf.is_finite(acceptance_rate),
tf.is_finite(new_log_prob))
acceptance_rate = tf.where(is_finite, acceptance_rate,
tf.zeros_like(acceptance_rate))
return old_hamiltonian, new_hamiltonian, old_log_prob, new_log_prob, \
acceptance_rate
def sgvb(self):
"""
Implements the stochastic gradient variational bayes (SGVB) gradient
estimator for the objective, also known as "reparameterization trick"
or "path derivative estimator". It was first used for importance
weighted objectives in (Burda, 2015), where it's named "IWAE".
It only works for latent `StochasticTensor` s that can be
reparameterized (Kingma, 2013). For example,
:class:`~zhusuan.model.stochastic.Normal`
and :class:`~zhusuan.model.stochastic.Concrete`.
.. note::
To use the :meth:`sgvb` estimator, the ``is_reparameterized``
property of each latent `StochasticTensor` must be True (which is
the default setting when they are constructed).
:return: A Tensor. The surrogate cost for Tensorflow optimizers to
minimize.
"""
return -self.tensor
def selu(x):
""" SELU.
Scaled Exponential Linear Unit.
Arguments
x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
`int16`, or `int8`
References:
Self-Normalizing Neural Networks, Klambauer et al., 2017.
Links:
[https://arxiv.org/abs/1706.02515](https://arxiv.org/abs/1706.02515)
"""
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
def _body(self, x, cumul_out, prev_state, cumul_state,
cumul_halting, iteration, remainder, halting_linear, x_ones):
"""The `body` of `tf.while_loop`."""
# Increase iteration count only for those elements that are still running.
all_ones = tf.constant(1, shape=(self._batch_size, 1), dtype=self._dtype)
is_iteration_over = tf.equal(cumul_halting, all_ones)
next_iteration = tf.where(is_iteration_over, iteration, iteration + 1)
out, next_state = self._core(x, prev_state)
# Get part of state used to compute halting values.
halting_input = halting_linear(self._get_state_for_halting(next_state))
halting = tf.sigmoid(halting_input, name="halting")
next_cumul_halting_raw = cumul_halting + halting
over_threshold = next_cumul_halting_raw > self._threshold
next_cumul_halting = tf.where(over_threshold, all_ones,
next_cumul_halting_raw)
next_remainder = tf.where(over_threshold, remainder,
1 - next_cumul_halting_raw)
p = next_cumul_halting - cumul_halting
next_cumul_state = _nested_add(cumul_state,
_nested_unary_mul(next_state, p))
next_cumul_out = cumul_out + p * out
return (x_ones, next_cumul_out, next_state, next_cumul_state,
next_cumul_halting, next_iteration, next_remainder)
def stochastical_binarize_gradients(grads_and_vars, scalers):
"""Stochastically binarize gradients."""
gradients, variables = zip(*grads_and_vars)
binarized_gradients = []
for gradient, scaler in zip(gradients, scalers):
if gradient is None:
binarized_gradients.append(None)
continue
if isinstance(gradient, tf.IndexedSlices):
gradient_shape = gradient.dense_shape
else:
gradient_shape = gradient.get_shape()
zeros = tf.zeros(gradient_shape)
abs_gradient = tf.abs(gradient)
sign_gradient = tf.sign( gradient )
rnd_sample = tf.random_uniform(gradient_shape,0,scaler)
where_cond = tf.less(rnd_sample, abs_gradient)
binarized_gradient = tf.cond(tf.size(gradient) < FLAGS.size_to_binarize,
lambda: gradient,
lambda: tf.where(where_cond, sign_gradient * scaler, zeros))
binarized_gradients.append(binarized_gradient)
return list(zip(binarized_gradients, variables))
def average_gradients2(tower_grads):
"""This is identical to average_gradients() but returns pairs of (shared gradient, unshared variable) across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of Lists of pairs of (gradient, variable) where the gradient has been averaged
across all towers and variable is the one in each tower.
"""
res = []
mean_grads = average_gradients(tower_grads)
for grad_and_vars in tower_grads:
_grads = []
for _grad1, _grad2 in zip(mean_grads, grad_and_vars):
_grads.append( (_grad1[0],_grad2[1]) )
res.append(_grads)
return res
def huber_loss(y_true, y_pred, clip_value):
# Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and
# https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b
# for details.
assert clip_value > 0.
x = y_true - y_pred
if np.isinf(clip_value):
# Spacial case for infinity since Tensorflow does have problems
# if we compare `K.abs(x) < np.inf`.
return .5 * tf.square(x)
condition = tf.abs(x) < clip_value
squared_loss = .5 * tf.square(x)
linear_loss = clip_value * (tf.abs(x) - .5 * clip_value)
return tf.where(condition, squared_loss, linear_loss) # condition, true, false
def safe_div(numerator, denominator, name='safe_div'):
"""Divides two values, returning 0 if the denominator is <= 0.
Args:
numerator: A real `Tensor`.
denominator: A real `Tensor`, with dtype matching `numerator`.
name: Name for the returned op.
Returns:
0 if `denominator` <= 0, else `numerator` / `denominator`
"""
return tf.where(
tf.greater(denominator, 0),
tf.truediv(numerator, denominator),
0,
name=name)
def _l1_smooth_loss(self, y_true, y_pred):
"""Compute L1-smooth loss.
# Arguments
y_true: Ground truth bounding boxes,
tensor of shape (?, num_boxes, 4).
y_pred: Predicted bounding boxes,
tensor of shape (?, num_boxes, 4).
# Returns
l1_loss: L1-smooth loss, tensor of shape (?, num_boxes).
# References
https://arxiv.org/abs/1504.08083
"""
abs_loss = tf.abs(y_true - y_pred)
sq_loss = 0.5 * (y_true - y_pred)**2
l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
return tf.reduce_sum(l1_loss, -1)
tensorflow_backend.py 文件源码
项目:deep-learning-keras-projects
作者: jasmeetsb
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def categorical_crossentropy(output, target, from_logits=False):
"""Categorical crossentropy between an output tensor
and a target tensor, where the target is a tensor of the same
shape as the output.
"""
# Note: tf.nn.softmax_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# scale preds so that the class probas of each sample sum to 1
output /= tf.reduce_sum(output,
reduction_indices=len(output.get_shape()) - 1,
keep_dims=True)
# manual computation of crossentropy
epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype)
output = tf.clip_by_value(output, epsilon, 1. - epsilon)
return - tf.reduce_sum(target * tf.log(output),
reduction_indices=len(output.get_shape()) - 1)
else:
try:
return tf.nn.softmax_cross_entropy_with_logits(labels=target,
logits=output)
except TypeError:
return tf.nn.softmax_cross_entropy_with_logits(output, target)
def filter_roidb(roidb):
"""Remove roidb entries that have no usable RoIs."""
def is_valid(entry):
# Valid images have:
# (1) At least one foreground RoI OR
# (2) At least one background RoI
overlaps = entry['max_overlaps']
# find boxes with sufficient overlap
fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
(overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# image is only valid if such boxes exist
valid = len(fg_inds) > 0 or len(bg_inds) > 0
return valid
num = len(roidb)
filtered_roidb = [entry for entry in roidb if is_valid(entry)]
num_after = len(filtered_roidb)
print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
num, num_after)
return filtered_roidb
def extract_dense_weights(sess):
for key in dense_layers.keys():
layer = dense_layers[key]
# sparse kernel
dense_kernel = layer.kernel
dense_kernel_shape = dense_kernel.get_shape().as_list()
# dense_kernel = tf.reshape(dense_kernel, [dense_kernel_shape[0] * dense_kernel_shape[1] * dense_kernel_shape[2],
# dense_kernel_shape[3]])
# dense_kernel = tf.transpose(dense_kernel)
idx = tf.where(tf.not_equal(dense_kernel, 0))
sparse_kernel = tf.SparseTensor(idx, tf.gather_nd(dense_kernel, idx), dense_kernel.get_shape())
if layer.bias is not None:
dk, k, b = sess.run([dense_kernel, sparse_kernel, layer.bias])
else:
dk, k = sess.run([dense_kernel, sparse_kernel])
b = None
dense_weights['%s/%s' % (key, 'kernel_dense')] = dk
dense_weights['%s/%s' % (key, 'kernel')] = k
dense_weights['%s/%s' % (key, 'kernel_shape')] = dense_kernel_shape
dense_weights['%s/%s' % (key, 'bias')] = b
def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'):
with tf.name_scope(scope) as sc:
min_k = layers[0]
max_k = layers[-1]
assigned_layers = \
tf.py_func(assign.assign_boxes,
[ gt_boxes, min_k, max_k ],
tf.int32)
assigned_layers = tf.reshape(assigned_layers, [-1])
assigned_tensors = []
for t in tensors:
split_tensors = []
for l in layers:
tf.cast(l, tf.int32)
inds = tf.where(tf.equal(assigned_layers, l))
inds = tf.reshape(inds, [-1])
split_tensors.append(tf.gather(t, inds))
assigned_tensors.append(split_tensors)
return assigned_tensors + [assigned_layers]
def bbox_to_mask(bbox, region_size, output_size, dtype=tf.float32):
"""Creates a binary mask of size `region_size` where rectangle given by
`bbox` is filled with ones and the rest is zeros. Finally, the binary mask
is resized to `output_size` with bilinear interpolation.
:param bbox: tensor of shape (..., 4)
:param region_size: tensor of shape (..., 2)
:param output_size: 2-tuple of ints
:param dtype: tf.dtype
:return: a tensor of shape = (..., output_size)
"""
shape = tf.concat(axis=0, values=(tf.shape(bbox)[:-1], output_size))
bbox = tf.reshape(bbox, (-1, 4))
region_size = tf.reshape(region_size, (-1, 2))
def create_mask(args):
yy, region_size = args
return _bbox_to_mask_fixed_size(yy, region_size, output_size, dtype)
mask = tf.map_fn(create_mask, (bbox, region_size), dtype=dtype)
return tf.reshape(mask, shape)
def iou(self, target_bbox, presence, per_timestep=False, reduce=True, start_t=1):
pred_bbox, target_bbox, presence = [i[start_t:] for i in (self.pred_bbox, target_bbox, presence)]
if not per_timestep:
return _loss.intersection_over_union(pred_bbox, target_bbox, presence)
else:
iou = _loss.intersection_over_union(pred_bbox, target_bbox, reduce=False)
iou = tf.where(presence, iou, tf.zeros_like(iou))
iou = tf.reduce_sum(iou, (1, 2))
p = tf.reduce_sum(tf.to_float(presence), (1, 2))
if reduce:
p = tf.maximum(p, tf.ones(tf.shape(presence)[0]))
iou /= p
return iou
else:
return iou, p
def __match_with_labels(self,gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores,jaccard,matching_threshold,gt_labels,gt_bboxes,num_anchors):
#debugging info
#jaccard = tf.Print(jaccard, [gt_labels], "gt_labels")
#match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5).
mask = tf.reduce_max (jaccard, axis = 0) > matching_threshold
mask_inds = tf.argmax(jaccard, axis = 0)
matched_labels = tf.gather(gt_labels, mask_inds)
gt_anchor_labels = tf.where(mask, matched_labels, gt_anchor_labels)
gt_anchor_bboxes = tf.where(mask, tf.gather(gt_bboxes, mask_inds),gt_anchor_bboxes)
gt_anchor_scores = tf.reduce_max(jaccard, axis= 0)
#matching each ground truth box to the default box with the best jaccard overlap
use_no_miss = True
if use_no_miss:
gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores = self.__match_no_miss(gt_anchor_labels, \
gt_anchor_bboxes, gt_anchor_scores, jaccard, \
gt_labels, gt_bboxes, num_anchors)
return gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores
def selu(x, alpha=None, scale=None, name='selu', outputs_collections=None, **unused):
"""
Computes selu
Args:
x: a `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, int16`, or `int8`.
alpha: float, selu parameters calculated from fixed points
scale: float, selu parameters calculated from fixed points
name: a optional scope/name of the layer
outputs_collections: The collections to which the outputs are added.
Returns:
A `Tensor` representing the results of the selu activation operation.
"""
_check_unused(unused, name)
with tf.name_scope(name):
if None in (alpha, scale):
# using parameters from 0 mean, unit variance points
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
output = scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
return _collect_named_outputs(outputs_collections, name, output)
def constrain_logits(self, logits, curr_state):
with tf.name_scope('constrain_logits'):
allowed_tokens = tf.gather(tf.constant(self.allowed_token_matrix), curr_state)
assert allowed_tokens.get_shape()[1:] == (self.output_size,)
constrained_logits = tf.where(allowed_tokens, logits, tf.fill(tf.shape(allowed_tokens), -1e+10))
return constrained_logits
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def _beam_where(self, cond, x, y):
assert x.shape.is_compatible_with(y.shape)
original_static_shape = x.shape
cond = tf.reshape(cond, [self.batch_size * self._beam_width])
x = self._merge_batch_beams(x, original_static_shape[2:])
y = self._merge_batch_beams(y, original_static_shape[2:])
return self._split_batch_beams(tf.where(cond, x, y), original_static_shape[2:])
def create_model(self,
model_input,
vocab_size,
num_frames,
**unused_params):
shape = model_input.get_shape().as_list()
frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2)
frames_true = tf.ones(tf.shape(frames_sum))
frames_false = tf.zeros(tf.shape(frames_sum))
frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1])
activation_1 = tf.reduce_max(model_input, axis=1)
activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6)
activation_3 = tf.reduce_min(model_input, axis=1)
model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max")
model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean")
model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min")
final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[shape[2], 3, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2)
weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params):
with tf.name_scope("loss_mix"):
float_labels = tf.cast(labels, tf.float32)
if FLAGS.support_type=="class":
seq = np.loadtxt(FLAGS.class_file)
tf_seq = tf.one_hot(tf.constant(seq,dtype=tf.int32),FLAGS.encoder_size)
float_classes_org = tf.matmul(float_labels,tf_seq)
class_true = tf.ones(tf.shape(float_classes_org))
class_false = tf.zeros(tf.shape(float_classes_org))
float_classes = tf.where(tf.greater(float_classes_org, class_false), class_true, class_false)
cross_entropy_class = self.calculate_loss(predictions_class,float_classes)
elif FLAGS.support_type=="frequent":
float_classes = float_labels[:,0:FLAGS.encoder_size]
cross_entropy_class = self.calculate_loss(predictions_class,float_classes)
elif FLAGS.support_type=="encoder":
float_classes = float_labels
for i in range(FLAGS.encoder_layers):
var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i)
weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32)
bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1])
float_classes = tf.nn.xw_plus_b(float_classes,weight_i,bias_i)
if i<FLAGS.encoder_layers-1:
float_classes = tf.nn.relu(float_classes)
else:
float_classes = tf.nn.sigmoid(float_classes)
#float_classes = tf.nn.relu(tf.sign(float_classes - 0.5))
cross_entropy_class = self.calculate_mseloss(predictions_class,float_classes)
else:
float_classes = float_labels
for i in range(FLAGS.moe_layers-1):
float_classes = tf.concat((float_classes,float_labels),axis=1)
cross_entropy_class = self.calculate_loss(predictions_class,float_classes)
cross_entropy_loss = self.calculate_loss(predictions,labels)
return cross_entropy_loss + 0.1*cross_entropy_class