def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1>0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1*v + (1. - mom1)*g
v_hat = v_t / (1. - tf.pow(mom1,t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2*mg + (1. - mom2)*tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2,t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
python类pow()的实例源码
def _MatMulGradMom(op, W, out_grad, batch_size, mom=2):
"""Computes gradient moment for a weight matrix through a MatMul operation.
Assumes ``Z=tf.matmul(A, W)``, where ``W`` is a d1xd2 weight matrix, ``A``
are the nxd1 activations of the previous layer (n being the batch size).
``out_grad`` is the gradient w.r.t. ``Z``, as computed by ``tf.gradients()``.
No transposes in the MatMul operation allowed.
Inputs:
:op: The MatMul operation
:W: The weight matrix (the tensor, not the variable)
:out_grad: The tensor of gradient w.r.t. to the output of the op
:batch_size: Batch size n (constant integer or scalar int tf.Tensor)
:mom: Integer moment desired (defaults to 2)"""
assert op.type == "MatMul"
t_a, t_b = op.get_attr("transpose_a"), op.get_attr("transpose_b")
assert W is op.inputs[1] and not t_a and not t_b
A = op.inputs[0]
out_grad_pow = tf.pow(out_grad, mom)
A_pow = tf.pow(A, mom)
return tf.multiply(batch_size, tf.matmul(A_pow, out_grad_pow, transpose_a=True))
def get_total_variation(x, shape):
with tf.name_scope('get_total_variation'):
# Get the dimensions of the variable image
height = shape[1]
width = shape[2]
size = reduce(lambda a, b: a * b, shape) ** 2
# Disjoin the variable image and evaluate the total variation
x_cropped = x[:, :height - 1, :width - 1, :]
left_term = tf.square(x[:, 1:, :width - 1, :] - x_cropped)
right_term = tf.square(x[:, :height - 1, 1:, :] - x_cropped)
smoothed_terms = tf.pow(left_term + right_term, TOTAL_VARIATION_SMOOTHING / 2.)
return tf.reduce_sum(smoothed_terms) / size
# Parse arguments and assign them to their respective global variables
def connect_cores(input, output_dim, name):
"""Connect two cores given the inputs, synaptic weights, and output dimension.
Inputs can be output from a previous core or spike inputs"""
input_dim = int(input.get_shape()[1])
s, axon_types, axon_weights = synapse_weight((input_dim, output_dim), name)
b = leak_bias([output_dim], name)
c = synapse_connection([input_dim, output_dim], name)
xc = tf.reshape(input, (-1, input_dim, 1)) * c
mu = b + tf.reduce_sum(xc * s, 1)
sigma2 = tf.reduce_sum(xc * (1. - xc) * tf.pow(s, 2), 1)
# Output is proba that each neuron fires
x0 = tf.zeros_like(mu)
output = normal_ccdf(x0, mu, sigma2)
return output, b, c, axon_types, axon_weights, s
def buildTVNorm(model):
adjustedImage = model.bgr
yPlusOne = tf.slice(adjustedImage, [0,0,1,0], [1,imageShape[0],(imageShape[1]-1),imageShape[2]])
xPlusOne = tf.slice(adjustedImage, [0,1,0,0], [1,(imageShape[0]-1),imageShape[1],imageShape[2]])
inputNoiseYadj = tf.slice(adjustedImage,[0,0,0,0],[1,imageShape[0],(imageShape[1]-1),imageShape[2]])
inputNoiseXadj = tf.slice(adjustedImage, [0,0,0,0], [1,(imageShape[0]-1),imageShape[1],imageShape[2]])
lambdaBeta = (sigma**beta) / (imageShape[0]*imageShape[1]*((a*B)**beta))
error1 = tf.slice(tf.square(yPlusOne-inputNoiseYadj), [0,0,0,0], [1,(imageShape[0]-1),(imageShape[1]-1), imageShape[2]])
error2 = tf.slice(tf.square(xPlusOne-inputNoiseXadj), [0,0,0,0], [1,(imageShape[0]-1),(imageShape[1]-1), imageShape[2]])
return lambdaBeta*tf.reduce_sum( tf.pow((error1+error2),(beta/2) ))
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1>0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1*v + (1. - mom1)*g
v_hat = v_t / (1. - tf.pow(mom1,t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2*mg + (1. - mom2)*tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2,t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
def sharp_weights(self,after_conv_shift, sharp_gamma):
"""
Sharpens the final weights
Parameters:
----------
after_conv_shift: Tensor (batch_size, memory_locations, number_of_keys)
weights after circular Convolution
sharp_gamma: Tensor (batch_size, number_of_keys)
scalar to sharpen the final weights
Returns: Tensor (batch_size, memory_locations, number_of_keys)
final weights
"""
sharp_gamma = tf.expand_dims(sharp_gamma,1)
powed_conv_w = tf.pow(after_conv_shift, sharp_gamma)
return powed_conv_w / tf.expand_dims(tf.reduce_sum(powed_conv_w,1),1)
def kl_gaussian(mean_, logsigma,
prior_mean=0., prior_logsigma=0.,
regularizer_scale=1.):
''' KL-divergence between two gaussians.
Useful for Variational AutoEncoders. Use this as an activation regularizer
Parameters:
-----------
mean, logsigma: parameters of the input distributions
prior_mean, prior_logsigma: paramaters of the desired distribution (note the
log on logsigma)
regularizer_scale: Rescales the regularization cost. Keep this 1 for most cases.
Note
----
origin implementation from seya:
https://github.com/Philip-Bachman/ICML-2015/blob/master/LogPDFs.py
Copyright (c) Philip Bachman
'''
gauss_klds = 0.5 * (prior_logsigma - logsigma +
((tf.exp(logsigma) + pow((mean_ - prior_mean), 2.0)) / tf.exp(prior_logsigma)) - 1.0)
return mean(gauss_klds)
def get_marginal_likelihood(yt, mean_yt, xt, s, alpha, beta, eta_mu, eta_sigma, eps, sigma_px, epsilon = 1e-8):
yt_expand = tf.expand_dims(yt, 0)
mean_yt = tf.reshape(mean_yt, [s, FLAGS.batch_size, 784])
xt = tf.reshape(xt, [1, s, FLAGS.batch_size, FLAGS.hidden_size])
# p_ygivenx = tf.reduce_prod(tf.pow(mean_yt, yt_expand) * tf.pow(1 - mean_yt, 1 - yt_expand), axis=2)
v = alpha / (alpha + beta)
pi = tf.concat(0, [v, [1.0]]) * tf.concat(0, [[1.0], tf.cumprod(1 - v)])
p_x = gaussian_mixture_pdf(eta_mu, tf.square(eta_sigma) + tf.square(sigma_px), xt, pi)
log_p_y_s = tf.reduce_sum(yt_expand * tf.log(mean_yt + epsilon) \
+ (1.0 - yt_expand) * tf.log(1.0 - mean_yt + epsilon), 2) \
+ tf.log(p_x) \
+ 0.5 * tf.reduce_sum(tf.square(eps), 2)
log_p_y_s_max = tf.reduce_max(log_p_y_s, reduction_indices=0)
log_p_y = tf.log(tf.reduce_mean(tf.exp(log_p_y_s - log_p_y_s_max), 0)) + log_p_y_s_max
return tf.reduce_mean(log_p_y)
# Taken from: https://github.com/tensorflow/tensorflow/issues/6322
def noisy_dense(inputs, units, bias_shape, c_names, w_i, b_i=None, activation=tf.nn.relu, noisy_distribution='factorised'):
def f(e_list):
return tf.multiply(tf.sign(e_list), tf.pow(tf.abs(e_list), 0.5))
# ??tf.layers?????flatten
# dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50)
if not isinstance(inputs, ops.Tensor):
inputs = ops.convert_to_tensor(inputs, dtype='float')
# dim_list = inputs.get_shape().as_list()
# flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:])
# reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape])
if len(inputs.shape) > 2:
inputs = tf.contrib.layers.flatten(inputs)
flatten_shape = inputs.shape[1]
weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i)
w_noise = tf.get_variable('w_noise', [flatten_shape, units], initializer=w_i, collections=c_names)
if noisy_distribution == 'independent':
weights += tf.multiply(tf.random_normal(shape=w_noise.shape), w_noise)
elif noisy_distribution == 'factorised':
noise_1 = f(tf.random_normal(tf.TensorShape([flatten_shape, 1]), dtype=tf.float32)) # ???????????????
noise_2 = f(tf.random_normal(tf.TensorShape([1, units]), dtype=tf.float32))
weights += tf.multiply(noise_1 * noise_2, w_noise)
dense = tf.matmul(inputs, weights)
if bias_shape is not None:
assert bias_shape[0] == units
biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i)
b_noise = tf.get_variable('b_noise', [1, units], initializer=b_i, collections=c_names)
if noisy_distribution == 'independent':
biases += tf.multiply(tf.random_normal(shape=b_noise.shape), b_noise)
elif noisy_distribution == 'factorised':
biases += tf.multiply(noise_2, b_noise)
return activation(dense + biases) if activation is not None else dense + biases
return activation(dense) if activation is not None else dense
# ???bias??????relu
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
def _polynomial(tensor):
size = int(tensor.get_shape()[1])
pows = [ tf.pow(tensor[:, n], n + 1) for n in range(size) ]
return tf.transpose(tf.pack(pows))
def GumbelSoftmaxLogDensity(y, p, tau):
# EPS = tf.constant(1e-10)
k = tf.shape(y)[-1]
k = tf.cast(k, tf.float32)
# y = y + EPS
# y = tf.divide(y, tf.reduce_sum(y, -1, keep_dims=True))
y = normalize_to_unit_sum(y)
sum_p_over_y = tf.reduce_sum(tf.divide(p, tf.pow(y, tau)), -1)
logp = tf.lgamma(k)
logp = logp + (k - 1) * tf.log(tau)
logp = logp - k * tf.log(sum_p_over_y)
logp = logp + sum_p_over_y
return logp
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
sigma_2 = sigma ** 2
box_diff = bbox_pred - bbox_targets
in_box_diff = bbox_inside_weights * box_diff
abs_in_box_diff = tf.abs(in_box_diff)
smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
+ (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
out_loss_box = bbox_outside_weights * in_loss_box
loss_box = tf.reduce_mean(tf.reduce_sum(
out_loss_box,
axis=dim
))
return loss_box
def perplexity(label, logit):
words = tf.cast(tf.size(label), tf.float32)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logit)
cross_entropy = tf.divide(tf.reduce_sum(cross_entropy), words)
perplex = tf.pow(2.0, cross_entropy)
return perplex
def weighted_binary_crossentropy(feature_weights):
def loss(y_true, y_pred):
# try:
# x = K.binary_crossentropy(y_pred, y_true)
# # y = tf.Variable(feature_weights.astype('float32'))
# # z = K.dot(x, y)
# y_true = tf.pow(y_true + 1e-5, .75)
# y2 = tf.div(y_true, tf.reshape(K.sum(y_true, 1), [-1, 1]))
# z = K.sum(tf.mul(x, y2), 1)
# except Exception as e:
# print e
# import pdb;pdb.set_trace()
# return z
return K.dot(K.binary_crossentropy(y_pred, y_true), K.variable(feature_weights.astype('float32')))
return loss
def meanShift(n_updates=-1):
X1 = tf.expand_dims(tf.transpose(input_X), 0)
X2 = tf.expand_dims(input_X, 0)
C = init_C
sbs_C = tf.TensorArray(dtype=tf.float32, size=10000, infer_shape=False)
sbs_C = sbs_C.write(0, init_C)
def _mean_shift_step(C):
C = tf.expand_dims(C, 2)
Y = tf.reduce_sum(tf.pow((C - X1) / window_radius, 2), axis=1)
gY = tf.exp(-Y)
num = tf.reduce_sum(tf.expand_dims(gY, 2) * X2, axis=1)
denom = tf.reduce_sum(gY, axis=1, keep_dims=True)
C = num / denom
return C
if n_updates > 0:
for i in range(n_updates):
C = _mean_shift_step(C)
sbs_C = sbs_C.write(i + 1, C)
else:
def _mean_shift(i, C, sbs_C, max_diff):
new_C = _mean_shift_step(C)
max_diff = tf.reshape(tf.reduce_max(tf.sqrt(tf.reduce_sum(tf.pow(new_C - C, 2), axis=1))), [])
sbs_C = sbs_C.write(i + 1, new_C)
return i + 1, new_C, sbs_C, max_diff
def _cond(i, C, sbs_C, max_diff):
return max_diff > 1e-5
n_updates, C, sbs_C, _ = tf.while_loop(cond=_cond,
body=_mean_shift,
loop_vars=(tf.constant(0), C, sbs_C, tf.constant(1e10)))
n_updates = tf.Print(n_updates, [n_updates])
return C, sbs_C.gather(tf.range(n_updates + 1))
def __get_grad_noise_scale(self, gradients):
if self.cfg.grad_noise_decay is None:
grad_noise_scale = self.cfg.grad_noise_scale
elif self.cfg.grad_noise_decay == 'annealing':
"""
Adds annealed gaussian noise to the gradients at
every time step, by decaying the variance at each
time step
g_t <- g_t + N(0, sigma_t^2)
sigma_t^2 = eta / (1 + t)^gamma
with eta selected from {0.01, 0.3, 1.0) and
gamma = 0.55
See: "Adding gradient noise improves learning
for very deep networks",
http://arxiv.org/pdf/1511.06807v1.pdf
"""
eta = self.cfg.grad_noise_scale ** 0.5
gamma = 0.55 / 2
grad_noise_scale = eta * tf.pow(tf.cast(
self.global_step + 1, self.cfg._FLOATX), -gamma)
elif self.cfg.grad_noise_decay == 'neural_gpu':
if self.prev_err is None:
grad_noise_scale = self.cfg.grad_noise_scale
else:
eta = self.cfg.grad_noise_scale
gamma = 0.55
grad_noise_scale = eta * tf.sqrt(
self.prev_err * tf.pow(tf.cast(
self.global_step + 1, self.cfg._FLOATX), -gamma))
else:
# Raise ValueError
raise NotImplementedError('Unknown value of '
'cfg.grad_noise_decay: %s' %
self.cfg.grad_noise_decay)
return grad_noise_scale
def pow(x, a):
'''Element-wise exponentiation.
'''
return tf.pow(x, a)