def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
python类scalar_mul()的实例源码
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def loss(logits, flos):
"""Add endpoint error (EPE) predict flows to ground truth with different weight
Args:
logits: Predict flows from inference().
flos: Grund truth
Returns:
Loss tensor of type float.
"""
x = logits[0]
y = tf.image.resize_images(flos, get_size(x))
flow6_loss = tf.scalar_mul(0.32, tf.reduce_mean(compute_euclidean_distance(x,y)))
x = logits[1]
y = tf.image.resize_images(flos, get_size(x))
flow5_loss = tf.scalar_mul(0.08, tf.reduce_mean(compute_euclidean_distance(x,y)))
x = logits[2]
y = tf.image.resize_images(flos, get_size(x))
flow4_loss = tf.scalar_mul(0.02, tf.reduce_mean(compute_euclidean_distance(x,y)))
x = logits[3]
y = tf.image.resize_images(flos, get_size(x))
flow3_loss = tf.scalar_mul(0.01, tf.reduce_mean(compute_euclidean_distance(x,y)))
x = logits[4]
y = tf.image.resize_images(flos, get_size(x))
flow2_loss = tf.scalar_mul(0.005, tf.reduce_mean(compute_euclidean_distance(x,y)))
tf.add_to_collection('losses', tf.add_n([flow6_loss, flow5_loss, flow4_loss, flow3_loss, flow2_loss]))
"""
x = logits
y = tf.image.resize_images(flos, get_size(x))
flow6_loss = tf.scalar_mul(0.32, tf.reduce_mean(compute_euclidean_distance(x,y)))
tf.add_to_collection('losses', flow6_loss)
"""
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def test_scalar_mul(self):
t = tf.scalar_mul(1, tf.Variable(self.random(3, 5)))
self.check(t)
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def main(_):
print('loading word embeddings from %s' % FLAGS.embedding_file)
weight_matrix, word_idx = sentiment.load_embeddings(FLAGS.embedding_file)
train_file = os.path.join(FLAGS.tree_dir, 'train.txt')
print('loading training trees from %s' % train_file)
train_trees = sentiment.load_trees(train_file)
dev_file = os.path.join(FLAGS.tree_dir, 'dev.txt')
print('loading dev trees from %s' % dev_file)
dev_trees = sentiment.load_trees(dev_file)
with tf.Session() as sess:
print('creating the model')
keep_prob = tf.placeholder_with_default(1.0, [])
train_feed_dict = {keep_prob: FLAGS.keep_prob}
word_embedding = sentiment.create_embedding(weight_matrix)
compiler, metrics = sentiment.create_model(
word_embedding, word_idx, FLAGS.lstm_num_units, keep_prob)
loss = tf.reduce_sum(compiler.metric_tensors['all_loss'])
opt = tf.train.AdagradOptimizer(FLAGS.learning_rate)
grads_and_vars = opt.compute_gradients(loss)
found = 0
for i, (grad, var) in enumerate(grads_and_vars):
if var == word_embedding.weights:
found += 1
grad = tf.scalar_mul(FLAGS.embedding_learning_rate_factor, grad)
grads_and_vars[i] = (grad, var)
assert found == 1 # internal consistency check
train = opt.apply_gradients(grads_and_vars)
saver = tf.train.Saver()
print('initializing tensorflow')
sess.run(tf.global_variables_initializer())
with compiler.multiprocessing_pool():
print('training the model')
train_set = compiler.build_loom_inputs(train_trees)
dev_feed_dict = compiler.build_feed_dict(dev_trees)
dev_hits_best = 0.0
for epoch, shuffled in enumerate(td.epochs(train_set, FLAGS.epochs), 1):
train_loss = 0.0
for batch in td.group_by_batches(shuffled, FLAGS.batch_size):
train_feed_dict[compiler.loom_input_tensor] = batch
_, batch_loss = sess.run([train, loss], train_feed_dict)
train_loss += batch_loss
dev_metrics = sess.run(metrics, dev_feed_dict)
dev_loss = dev_metrics['all_loss']
dev_accuracy = ['%s: %.2f' % (k, v * 100) for k, v in
sorted(dev_metrics.items()) if k.endswith('hits')]
print('epoch:%4d, train_loss: %.3e, dev_loss: %.3e, dev_accuracy: [%s]'
% (epoch, train_loss, dev_loss, ' '.join(dev_accuracy)))
dev_hits = dev_metrics['root_hits']
if dev_hits > dev_hits_best:
dev_hits_best = dev_hits
save_path = saver.save(sess, FLAGS.checkpoint_base, global_step=epoch)
print('model saved in file: %s' % save_path)
def photoAugmentation(source, target, mean):
"""
Includes contrast and brightness, color channel and gamma change, adding additive gaussian noise
"""
num_batch = source.get_shape()[0].value
height = source.get_shape()[1].value
width = source.get_shape()[2].value
photo_source_list = []
photo_target_list = []
for batch_idx in xrange(num_batch):
img0 = source[batch_idx,:,:,:]
img1 = target[batch_idx,:,:,:]
# Contrast and brightness change
contrast = tf.random_uniform([], minval=-0.3, maxval=0.3)
contrast = contrast + 1.0
bright_sigma = 0.2 # tf.random_uniform([], minval=0.0, maxval=0.2)
brightnessImage = tf.random_normal([height,width,3], mean=0.0, stddev=bright_sigma, dtype=tf.float32)
img0_contrast = tf.add(tf.scalar_mul(contrast, img0), brightnessImage)
img1_contrast = tf.add(tf.scalar_mul(contrast, img1), brightnessImage)
# Color change, may be bad for unsupervised learning
color_change_B = tf.random_uniform([], minval=0.9, maxval=1.1)
color_change_G = tf.random_uniform([], minval=0.9, maxval=1.1)
color_change_R = tf.random_uniform([], minval=0.9, maxval=1.1)
img0_color_B = tf.scalar_mul(color_change_B, img0_contrast[:,:,0])
img0_color_G = tf.scalar_mul(color_change_G, img0_contrast[:,:,1])
img0_color_R = tf.scalar_mul(color_change_R, img0_contrast[:,:,2])
img0_color = tf.pack([img0_color_B, img0_color_G, img0_color_R], axis=2)
img1_color_B = tf.scalar_mul(color_change_B, img1_contrast[:,:,0])
img1_color_G = tf.scalar_mul(color_change_G, img1_contrast[:,:,1])
img1_color_R = tf.scalar_mul(color_change_R, img1_contrast[:,:,2])
img1_color = tf.pack([img1_color_B, img1_color_G, img1_color_R], axis=2)
img0_color = tf.clip_by_value(img0_color, 0.0, 1.0)
img1_color = tf.clip_by_value(img1_color, 0.0, 1.0)
# Gamma
gamma = tf.random_uniform([], minval=0.7, maxval=1.5)
gamma_inv = tf.inv(gamma)
img0_gamma = tf.pow(img0_color, gamma_inv)
img1_gamma = tf.pow(img1_color, gamma_inv)
# Additive gaussian noise
sigma = tf.random_uniform([], minval=0.0, maxval=0.04)
noiseImage = tf.random_normal([height,width,3], mean=0.0, stddev=sigma, dtype=tf.float32)
img0_noise = tf.add(img0_gamma, noiseImage)
img1_noise = tf.add(img1_gamma, noiseImage)
# Subtract mean
img0_mean = tf.sub(img0_noise, tf.truediv(mean, 255.0))
img1_mean = tf.sub(img1_noise, tf.truediv(mean, 255.0))
photo_source_list.append(img0_mean)
photo_target_list.append(img1_mean)
return tf.pack(photo_source_list, axis=0), tf.pack(photo_target_list, axis=0)
def create_cost_bending(tn):
tn_shape = tn.get_shape().as_list();
tn1 = tf.slice(tn, [1,0], [-1,-1]);
tn2 = tf.slice(tn, [0,0], [tn_shape[0]-1,-1]);
dp = tf.reduce_sum(tf.mul(tn1, tn2), reduction_indices = 1);
return tf.scalar_mul(-1.0, tf.reduce_mean(dp));
#def create_cost_side(s, b, length = 1.0, weight_spacing = 1.0, weight_bending = 1.0):
# cost = create_cost_soft_min_distance(s, b);
# if weight_spacing != 0:
# cost_spacing = create_cost_spacing(s, length);
# cost = tf.add(cost, tf.mul(tf.constant(weight_spacing, "float32"), cost_spacing));
# if weight_bending != 0:
# cost_bending = create_cost_bending(s);
# cost = tf.add(cost, tf.mul(tf.constant(weight_bending, "float32"), cost_bending));
# return cost;
#def create_cost(c, w, b, nb, length, weight_spacing = 1.0, weight_bending = 1.0, gamma = 1.0, kappa = 2.0):
# #tangents
# t = create_tangent(c);
# tn = create_normalize_tangent(t);
# nl = create_normal(tn);
# nr = tf.scalar_mul(-1.0, nl);
#
# l,r = create_left_right(c,w,nl);
#
# cost_left = create_cost_soft_min_aligned_distance(l, b, nl, nb, k = kappa, gamma = gamma);
# cost_right= create_cost_soft_min_aligned_distance(r, b, nr, nb, k = kappa, gamma = gamma);
# cost = tf.add(cost_left, cost_right);
#
# #spacing and bending
# if weight_spacing != 0:
# cost_spacing = tf.scalar_mul(weight_spacing, create_cost_spacing(t, length));
# cost = tf.add(cost, cost_spacing);
# else:
# cost_spacing = tf.constant(0);
#
# if weight_bending != 0:
# cost_bending = tf.scalar_mul(weight_bending, create_cost_bending(tn));
# cost = tf.add(cost, cost_bending);
# else:
# cost_bending = tf.constant(0);
#
# return (cost, cost_left, cost_right, cost_spacing, cost_bending, nl, l, r);
def create_cost(center, xy, width, persitaltic, bend, bend_profiles, contour, contour_normals, length, weight_spacing = 1.0, weight_bending = 1.0, gamma = 1.0, kappa = 2.0):
c = tf.add(center, xy);
#tangent and normals
t = create_tangent(c);
tn = create_normalize_tangent(t);
ta = create_average_tangent(tn);
nl = create_normal(ta);
#bend the center
c = create_bend(center, nl, bend, bend_profiles);
#peristaltically move the center
c = create_peristaltic(c, ta, persitaltic);
#tangents
t = create_tangent(c);
tn = create_normalize_tangent(t);
ta = create_average_tangent(tn);
nl = create_normal(ta);
nr = tf.scalar_mul(-1.0, nl);
l,r = create_left_right(c,width,nl);
cost_left = create_cost_soft_min_aligned_distance(l, contour, nl, contour_normals, k = kappa, gamma = gamma);
cost_right= create_cost_soft_min_aligned_distance(r, contour, nr, contour_normals, k = kappa, gamma = gamma);
cost = tf.add(cost_left, cost_right);
#spacing and bending
if weight_spacing != 0:
cost_spacing = tf.scalar_mul(weight_spacing, create_cost_spacing(t, length));
cost = tf.add(cost, cost_spacing);
else:
cost_spacing = tf.constant(0);
if weight_bending != 0:
cost_bending = tf.scalar_mul(weight_bending, create_cost_bending(tn));
cost = tf.add(cost, cost_bending);
else:
cost_bending = tf.constant(0);
return (cost, cost_left, cost_right, cost_spacing, cost_bending, c, l, r, nl);
def get_mask_file(origin_images, mask_file, height, width, channels=3):
"""blur image through a mask file"""
img_bytes = tf.read_file(mask_file)
maskimage = tf.image.decode_jpeg(img_bytes)
maskimage = tf.to_float(maskimage)
m_mean = tf.reduce_mean(maskimage, axis=(1,2))
index = tf.where(m_mean < 1.5)
side_index = tf.where(m_mean >= 1.5)
top_index = side_index + tf.to_int64(1)
down_index = side_index - tf.to_int64(1)
select = tf.zeros_like(m_mean, dtype=tf.float32)
side_select = tf.ones_like(m_mean, dtype=tf.float32)
values = tf.squeeze(tf.ones_like(index, dtype=tf.float32))
side_values = tf.squeeze(tf.ones_like(side_index, dtype=tf.float32))
top_values = tf.scalar_mul(tf.random_uniform([], minval=0, maxval=1), side_values)
down_values = tf.scalar_mul(tf.random_uniform([], minval=0, maxval=1), side_values)
delta = tf.SparseTensor(index, values, [height])
top_delta = tf.SparseTensor(top_index, top_values, [height])
down_delta = tf.SparseTensor(down_index, down_values, [height])
black_select = select + tf.sparse_tensor_to_dense(delta)
top_select = side_select + tf.sparse_tensor_to_dense(top_delta)
down_select = side_select + tf.sparse_tensor_to_dense(down_delta)
top_select = tf.expand_dims(tf.divide(tf.ones_like(top_select), top_select), -1)
top_select = tf.matmul(top_select, tf.ones([1, width]))
top_select = tf.expand_dims(top_select, -1)
down_select = tf.expand_dims(tf.divide(tf.ones_like(down_select), down_select), -1)
down_select = tf.matmul(down_select, tf.ones([1, width]))
down_select = tf.expand_dims(down_select, -1)
black_select = tf.expand_dims(black_select, -1)
black_select = tf.matmul(black_select, tf.ones([1, width]))
black_select = tf.expand_dims(black_select, 0)
black_select = tf.expand_dims(black_select, -1)
top_select = tf.expand_dims(top_select, 0)
down_select = tf.expand_dims(down_select, 0)
source = tf.mul(origin_images, top_select)
source = tf.mul(source, down_select)
source = tf.mul(source, black_select)
return source
def __init__(self, steps=30, learning_rate=5e-4):
def transition(z, step):
with tf.variable_scope('transition') as vs:
if step > 1:
vs.reuse_variables()
fc1 = tf.contrib.layers.fully_connected(z, 600, activation_fn=tf.identity)
fc1 = tf.nn.relu(tf.contrib.layers.batch_norm(fc1))
fc1 = tf.contrib.layers.fully_connected(fc1, 600, activation_fn=tf.identity)
fc1 = tf.nn.relu(tf.contrib.layers.batch_norm(fc1))
mu = tf.contrib.layers.fully_connected(fc1, 784, activation_fn=tf.sigmoid)
sig = tf.contrib.layers.fully_connected(fc1, 784, activation_fn=tf.sigmoid)
sig = tf.add(tf.div(sig, step ** 2), 1e-4)
#sig = tf.add(tf.scalar_mul(0.1, sig), 1e-4)
sig = tf.sqrt(sig)
e = tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)
z_ = tf.add(mu, tf.mul(e, sig))
z_ = tf.minimum(tf.maximum(0.0, z_), 1.0)
return z_, mu, sig
self.x = tf.placeholder(tf.float32, [None, 784])
self.alpha = tf.placeholder(tf.float32, [])
z = tf.random_normal(tf.shape(self.x), 0, 1, dtype=tf.float32)
z = mnist_mean + mnist_std * z
z = tf.minimum(tf.maximum(0.0, z), 1.0)
self.rand_init = infuse(z, self.x, self.alpha)
self.init = tf.placeholder(tf.float32, [None, 784])
self.z = [self.init]
z = self.z[0]
self.loss = 0.0
for t in range(1, steps + 1):
z, mu, sig = transition(z, t)
z = infuse(z, self.x, self.alpha * t)
dist = tf.contrib.distributions.Normal(mu=mu, sigma=sig)
#self.loss = self.loss + tf.reduce_mean(-dist.log_pdf(self.x))
self.loss = self.loss + tf.scalar_mul(t / float(steps), tf.reduce_mean(-dist.log_pdf(self.x)))
self.z.append(z)
for t in range(steps + 1, steps * 2 + 1):
z, mu, sig = transition(z, t)
z = infuse(z, self.x, self.alpha * t)
self.z.append(z)
self.trainer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)
self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, dim_in, dim_hidden, dim_out, learning_rate, gate=Relu(),
initializer=tf.random_normal_initializer(), bottom_layer=None, optimizer=None):
self.learning_rate = learning_rate
if optimizer is None:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
if bottom_layer is None:
self.x = tf.placeholder(tf.float32, shape=(None, dim_in))
var_in = self.x
trainable_vars = []
else:
self.x = bottom_layer.x
var_in = bottom_layer.var_out
trainable_vars = bottom_layer.trainable_vars
self.h = tf.placeholder(tf.float32, shape=(dim_in, dim_hidden, dim_out))
self.target = tf.placeholder(tf.float32, shape=(None, dim_out))
U, b_hidden, net, phi, W, b_out, y =\
crossprop_layer('crossprop', 'crossprop_layer', var_in, dim_in, dim_hidden, dim_out, gate.gate_fun, initializer)
self.pred = tf.nn.softmax(y)
ce_loss = tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=self.target)
self.loss = tf.reduce_mean(ce_loss)
self.total_loss = tf.reduce_sum(ce_loss)
correct_prediction = tf.equal(tf.argmax(self.target, 1), tf.argmax(self.pred, 1))
self.correct_labels = tf.reduce_sum(tf.cast(correct_prediction, "float"))
delta = tf.subtract(self.pred, self.target)
trainable_vars.extend([W, b_out])
h_decay = tf.subtract(1.0, tf.scalar_mul(learning_rate, tf.pow(phi, 2)))
h_decay = tf.reshape(tf.tile(h_decay, [1, tf.shape(self.h)[0]]), [-1, tf.shape(self.h)[0], tf.shape(self.h)[1]])
h_decay = tf.reduce_sum(h_decay, axis=0)
self.h_decay = tf.reshape(h_decay, [tf.shape(h_decay)[0], tf.shape(h_decay)[1], 1])
phi_grad_x = tf.matmul(tf.transpose(var_in), gate.gate_fun_gradient(phi, net))
phi_grad_x = tf.reshape(phi_grad_x, [tf.shape(phi_grad_x)[0], tf.shape(phi_grad_x)[1], 1])
h_delta = tf.reshape(tf.tile(delta, [1, tf.shape(self.h)[0]]), [-1, tf.shape(self.h)[0], tf.shape(self.h)[1]])
self.h_delta = tf.reduce_sum(h_delta, axis=0)
new_grads = []
phi_phi_grad = tf.multiply(phi, gate.gate_fun_gradient(phi, net))
weight = tf.transpose(tf.matmul(self.h, tf.transpose(delta)))
phi_phi_grad = tf.multiply(phi_phi_grad, weight)
new_u_grad = tf.matmul(tf.transpose(var_in), phi_phi_grad)
new_u_grad = tf.scalar_mul(1.0 / tf.cast(tf.shape(var_in)[0], tf.float32), new_u_grad)
new_grads.append(new_u_grad)
new_b_hidden_grad = tf.reduce_mean(phi_phi_grad, axis=0)
new_grads.append(new_b_hidden_grad)
old_grads = optimizer.compute_gradients(self.loss, var_list=[U, b_hidden])
for i, (grad, var) in enumerate(old_grads):
old_grads[i] = (new_grads[i], var)
other_grads = optimizer.compute_gradients(self.loss, var_list=trainable_vars)
self.all_gradients = old_grads + other_grads
self.train_op = optimizer.apply_gradients(self.all_gradients)
self.h_var = np.zeros((dim_hidden, dim_out))
self.other_info = [self.total_loss, self.correct_labels, self.h_decay, self.h_delta]
def compute_loss(self, scores, scores_no_dropout, labels):
loss = tf.constant(0.0)
if self.viterbi:
zero_elements = tf.equal(self.sequence_lengths, tf.zeros_like(self.sequence_lengths))
count_zeros_per_row = tf.reduce_sum(tf.to_int32(zero_elements), axis=1)
flat_sequence_lengths = tf.add(tf.reduce_sum(self.sequence_lengths, 1),
tf.scalar_mul(2, count_zeros_per_row))
log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(scores, labels, flat_sequence_lengths,
transition_params=self.transition_params)
loss += tf.reduce_mean(-log_likelihood)
else:
if self.which_loss == "mean" or self.which_loss == "block":
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=labels)
masked_losses = tf.multiply(losses, self.input_mask)
loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(self.input_mask))
elif self.which_loss == "sum":
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=labels)
masked_losses = tf.multiply(losses, self.input_mask)
loss += tf.reduce_sum(masked_losses)
elif self.which_loss == "margin":
# todo put into utils
# also todo put idx-into-3d as sep func
flat_labels = tf.reshape(labels, [-1])
batch_offsets = tf.multiply(tf.range(self.batch_size), self.max_seq_len * self.num_classes)
repeated_batch_offsets = tf_utils.repeat(batch_offsets, self.max_seq_len)
tok_offsets = tf.multiply(tf.range(self.max_seq_len), self.num_classes)
tiled_tok_offsets = tf.tile(tok_offsets, [self.batch_size])
indices = tf.add(tf.add(repeated_batch_offsets, tiled_tok_offsets), flat_labels)
# scores w/ true label set to -inf
sparse = tf.sparse_to_dense(indices, [self.batch_size * self.max_seq_len * self.num_classes], np.NINF)
loss_augmented_flat = tf.add(tf.reshape(scores, [-1]), sparse)
loss_augmented = tf.reshape(loss_augmented_flat, [self.batch_size, self.max_seq_len, self.num_classes])
# maxes excluding true label
max_scores = tf.reshape(tf.reduce_max(loss_augmented, [-1]), [-1])
sparse = tf.sparse_to_dense(indices, [self.batch_size * self.max_seq_len * self.num_classes],
-self.margin)
loss_augmented_flat = tf.add(tf.reshape(scores, [-1]), sparse)
label_scores = tf.gather(loss_augmented_flat, indices)
# margin + max_logit - correct_logit == max_logit - (correct - margin)
max2_diffs = tf.subtract(max_scores, label_scores)
mask = tf.reshape(self.input_mask, [-1])
loss += tf.reduce_mean(tf.multiply(mask, tf.nn.relu(max2_diffs)))
loss += self.l2_penalty * self.l2_loss
drop_loss = tf.nn.l2_loss(tf.subtract(scores, scores_no_dropout))
loss += self.drop_penalty * drop_loss
return loss
def forward_step_fn(self, params, inputs):
"""
Forward step over a batch, to be used in tf.scan
:param params:
:param inputs: (batch_size, variable dimensions)
:return:
"""
mu_pred, Sigma_pred, _, _, alpha, u, state, buffer, _, _, _ = params
y = tf.slice(inputs, [0, 0], [-1, self.dim_y]) # (bs, dim_y)
_u = tf.slice(inputs, [0, self.dim_y], [-1, self.dim_u]) # (bs, dim_u)
mask = tf.slice(inputs, [0, self.dim_y + self.dim_u], [-1, 1]) # (bs, dim_u)
# Mixture of C
C = tf.matmul(alpha, tf.reshape(self.C, [-1, self.dim_y*self.dim_z])) # (bs, k) x (k, dim_y*dim_z)
C = tf.reshape(C, [-1, self.dim_y, self.dim_z]) # (bs, dim_y, dim_z)
C.set_shape([Sigma_pred.get_shape()[0], self.dim_y, self.dim_z])
# Residual
y_pred = tf.squeeze(tf.matmul(C, tf.expand_dims(mu_pred, 2))) # (bs, dim_y)
r = y - y_pred # (bs, dim_y)
# project system uncertainty into measurement space
S = tf.matmul(tf.matmul(C, Sigma_pred), C, transpose_b=True) + self.R # (bs, dim_y, dim_y)
S_inv = tf.matrix_inverse(S)
K = tf.matmul(tf.matmul(Sigma_pred, C, transpose_b=True), S_inv) # (bs, dim_z, dim_y)
# For missing values, set to 0 the Kalman gain matrix
K = tf.multiply(tf.expand_dims(mask, 2), K)
# Get current mu and Sigma
mu_t = mu_pred + tf.squeeze(tf.matmul(K, tf.expand_dims(r, 2))) # (bs, dim_z)
I_KC = self._I - tf.matmul(K, C) # (bs, dim_z, dim_z)
Sigma_t = tf.matmul(tf.matmul(I_KC, Sigma_pred), I_KC, transpose_b=True) + self._sast(self.R, K) # (bs, dim_z, dim_z)
# Mixture of A
alpha, state, u, buffer = self.alpha(tf.multiply(mask, y) + tf.multiply((1-mask), y_pred), state, _u, buffer, reuse=True) # (bs, k)
A = tf.matmul(alpha, tf.reshape(self.A, [-1, self.dim_z*self.dim_z])) # (bs, k) x (k, dim_z*dim_z)
A = tf.reshape(A, [-1, self.dim_z, self.dim_z]) # (bs, dim_z, dim_z)
A.set_shape(Sigma_pred.get_shape()) # set shape to batch_size x dim_z x dim_z
# Mixture of B
B = tf.matmul(alpha, tf.reshape(self.B, [-1, self.dim_z*self.dim_u])) # (bs, k) x (k, dim_y*dim_z)
B = tf.reshape(B, [-1, self.dim_z, self.dim_u]) # (bs, dim_y, dim_z)
B.set_shape([A.get_shape()[0], self.dim_z, self.dim_u])
# Prediction
mu_pred = tf.squeeze(tf.matmul(A, tf.expand_dims(mu_t, 2))) + tf.squeeze(tf.matmul(B, tf.expand_dims(u, 2)))
Sigma_pred = tf.scalar_mul(self._alpha_sq, tf.matmul(tf.matmul(A, Sigma_t), A, transpose_b=True) + self.Q)
return mu_pred, Sigma_pred, mu_t, Sigma_t, alpha, u, state, buffer, A, B, C
def build_rmsprop_optimizer(loss, learning_rate, rmsprop_decay, rmsprop_constant, gradient_clip, version):
with tf.name_scope('rmsprop'):
optimizer = None
if version == 'rmsprop':
optimizer = tf.train.RMSPropOptimizer(
learning_rate, decay=rmsprop_decay, momentum=0.0, epsilon=rmsprop_constant)
elif version == 'graves_rmsprop':
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
grads = []
params = []
for p in grads_and_vars:
if p[0] == None:
continue
grads.append(p[0])
params.append(p[1])
#grads = [gv[0] for gv in grads_and_vars]
#params = [gv[1] for gv in grads_and_vars]
if gradient_clip > 0:
grads = tf.clip_by_global_norm(grads, gradient_clip)[0]
if version == 'rmsprop':
return optimizer.apply_gradients(zip(grads, params))
elif version == 'graves_rmsprop':
square_grads = [tf.square(grad) for grad in grads]
avg_grads = [tf.Variable(tf.zeros(var.get_shape()))
for var in params]
avg_square_grads = [tf.Variable(
tf.zeros(var.get_shape())) for var in params]
update_avg_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + tf.scalar_mul((1 - rmsprop_decay), grad_pair[1]))
for grad_pair in zip(avg_grads, grads)]
update_avg_square_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * tf.square(grad_pair[1])))
for grad_pair in zip(avg_square_grads, grads)]
avg_grad_updates = update_avg_grads + update_avg_square_grads
rms = [tf.sqrt(avg_grad_pair[1] - tf.square(avg_grad_pair[0]) + rmsprop_constant)
for avg_grad_pair in zip(avg_grads, avg_square_grads)]
rms_updates = [grad_rms_pair[0] / grad_rms_pair[1]
for grad_rms_pair in zip(grads, rms)]
train = optimizer.apply_gradients(zip(rms_updates, params))
return tf.group(train, tf.group(*avg_grad_updates)), grads_and_vars