def _tf_safe_reciprocal(x):
return tf.reciprocal(x + tf.cast(tf.equal(x, 0), x.dtype))
python类reciprocal()的实例源码
def test_basic(self):
with tf.Graph().as_default(), self.test_session() as sess:
rnd = np.random.RandomState(0)
x = self.get_random_tensor([18, 12], rnd=rnd)
y = tf.reciprocal(x)
self.assert_bw_fw(sess, x, y, rnd=rnd)
def _apply_dropout_mask(tensor_shape, keep_prob=1.0, normalize=True):
random_tensor = keep_prob + tf.random_uniform(tensor_shape, dtype=tf.float32)
binary_mask = tf.floor(random_tensor)
if normalize:
binary_mask = tf.reciprocal(keep_prob) * binary_mask
return binary_mask
def testCplxReciprocalGPU(self):
shapes = [(5,4,3), (5,4), (5,), (1,)]
for sh in shapes:
x = ((np.random.randn(*sh) +
1j*np.random.randn(*sh)).astype(np.complex64))
self._compareGpu(x, np.reciprocal, tf.reciprocal)
def testCplxReciprocalGradGPU(self):
shapes = [(5,4,3), (5,4), (5,), (1,)]
for sh in shapes:
x = ((np.random.randn(*sh) +
1j*np.random.randn(*sh)).astype(np.complex64))
self._compareGpuGrad(x, np.reciprocal, tf.reciprocal)
def _gaussian_pdf(self, x, mixings, sigma, mean):
""" Wrapper for Gaussian PDF """
variance = tf.square(sigma)
output_size = tf.cast(tf.shape(mean)[1], tf.float32)
# Left: 1/sqrt(pi * 2 * variance) [N, K]
left = tf.reciprocal(tf.pow(2*np.pi, output_size/2.0) *
tf.pow(sigma, output_size))
# Exponent: e^[-(x-mu)^2/(2var)]. [N, K]
right = tf.exp(-tf.divide(tf.square(x - mean), 2 * variance))
return tf.multiply(left, right)
def __init__(self, input_layer, input_layer_size, num_classes, scope=None, **kwargs):
if not hasattr(num_classes, "__len__"):
num_classes = (num_classes, )
# All splits are done via half-spaces, so there are always 2^k-1 output
# nodes. We handle non-power-of-two nodes by keeping track of the buffer
# sizes vs. the actual multinomial dimensions.
self._num_classes = num_classes
self._dim_sizes = [2**(int(np.ceil(np.log2(c)))) for c in num_classes]
self._num_nodes = np.prod(self._dim_sizes) - 1 # flatten the density into a 1-d grid
self._split_labels, self._split_masks = self.multinomial_split_masks()
with tf.variable_scope(scope or type(self).__name__):
self._labels = tf.placeholder(tf.float32, shape=[None, np.prod(self._num_classes)])
W = weight_variable([input_layer_size, self._num_nodes])
b = bias_variable([self._num_nodes])
split_indices = tf.to_int32(tf.argmax(self._labels, 1))
splits, z = tf.gather(self._split_labels, split_indices), tf.gather(self._split_masks, split_indices)
# q is the value of the tree nodes
# m is the value of the multinomial bins
self._q = tf.reciprocal(1 + tf.exp(-(tf.matmul(input_layer,W) + b)))
r = splits * tf.log(tf.clip_by_value(self._q, 1e-10, 1.0))
s = (1 - splits) * tf.log(tf.clip_by_value(1 - self._q, 1e-10, 1.0))
self._loss_function = tf.reduce_mean(-tf.reduce_sum(z * (r+s),
axis=[1]))
# Convert from multiscale output to multinomial output
L, R = self.multiscale_splits_masks()
q_tiles = tf.constant([1, np.prod(self._num_classes)])
m = tf.map_fn(lambda q_i: self.multiscale_to_multinomial(q_i, L, R, q_tiles), self._q)
# Reshape to the original dimensions of the density
density_shape = tf.stack([tf.shape(self._q)[0]] + list(self._num_classes))
self._density = tf.reshape(m, density_shape)
self._cross_entropy = tf.reduce_mean(-tf.reduce_sum(self._labels * tf.log(tf.clip_by_value(m, 1e-10, 1.0))
+ (1 - self._labels) * tf.log(tf.clip_by_value(1 - m, 1e-10, 1.0)),
axis=[1]))
def build(self, input_layer):
if self._one_hot:
split_indices = tf.to_int32(tf.argmax(self._labels, 1))
else:
split_indices = tf.to_int32(tf.reduce_sum([self._labels[:,i]*int(np.prod(self._num_classes[i+1:])) for i in xrange(len(self._num_classes))], 0))
self.splits, self.masks = tf.gather(self._split_labels, split_indices), tf.gather(self._split_masks, split_indices)
# q is the value of the tree nodes
# m is the value of the multinomial bins
# z is the log-space version of m
self._q = tf.reciprocal(1 + tf.exp(-(tf.matmul(input_layer,self.W) + self.b)))
r = self.splits * tf.log(tf.clip_by_value(self._q, 1e-10, 1.0))
s = (1 - self.splits) * tf.log(tf.clip_by_value(1 - self._q, 1e-10, 1.0))
self._multiscale_loss = tf.reduce_mean(-tf.reduce_sum(self.masks * (r+s),
axis=[1]))
# Convert from multiscale output to multinomial output
L, R = self.multiscale_splits_masks()
q_tiles = tf.constant([1, np.prod(self._num_classes)])
m = tf.map_fn(lambda q_i: self.multiscale_to_multinomial(q_i, L, R, q_tiles), self._q)
z = tf.log(tf.clip_by_value(m, 1e-10, 1.))
# Get the trend filtering penalty
fv = trend_filtering_penalty(z, self._num_classes, self._k, penalty=self._penalty)
reg = tf.multiply(self._lam, fv)
self._loss_function = tf.add(self._multiscale_loss, reg)
# Reshape to the original dimensions of the density
density_shape = tf.stack([tf.shape(self._q)[0]] + list(self._num_classes))
self._density = tf.reshape(m, density_shape)
def get_log_probs(self, indices, splits, dims):
'''Get the necessary nodes from the tree, calculate the log probs, and reshape appropriately'''
dim1size = int(np.prod(dims))
sampled_W = tf.transpose(tf.gather(self._W, indices), [0,2,1]) # [batchsize, inputlayersize, dim1size]
sampled_b = tf.gather(self._b, indices) # [batchsize, dim1size]
# input_layer is [batchsize, inputlayersize]
# sampled_W is [batchsize, inputlayersize, dim1size]
# sampled_q is [batchsize, dim1size] corresponding to q = X*W + b
sampled_q = tf.reshape(tf.matmul(tf.expand_dims(self._input_layer,1), sampled_W),
[-1, dim1size]) + sampled_b
sampled_probs = tf.reciprocal(1 + tf.exp(-sampled_q))
log_probs = tf.log(tf.clip_by_value(tf.where(splits > 0, sampled_probs, 1-sampled_probs), 1e-10, 1.0))
log_probs_dims = tf.reshape(log_probs, [-1] + dims)
return tf.reduce_sum(log_probs_dims, axis=[len(dims)])
def get_density_probs(self):
q = tf.matmul(self._input_layer, tf.transpose(self._W)) + self._b
probs = tf.reciprocal(1 + tf.exp(-q)) # [batchsize, num_nodes]
log_probs = tf.map_fn(lambda x: self._grid_log_probs(x), probs) # [batchsize, gridlen]
return tf.exp(log_probs) / tf.reduce_sum(tf.exp(log_probs), axis=range(1,len(self._num_classes)+1), keep_dims=True)
def univariate_gaussian_likelihood(x, mu, sigma):
result = tf.subtract(x, mu)
result = tf.multiply(result,tf.reciprocal(sigma))
result = -tf.square(result)/2.
return tf.multiply(tf.exp(result),tf.reciprocal(sigma))/np.sqrt(2*np.pi)
def __init__(self, length, k, lam, neighbor_radius):
with tf.variable_scope(type(self).__name__):
self.length = length
# Trend filtering setup
self.k = k
self.neighbor_radius = neighbor_radius
self.neighborhood_size = 2 * self.neighbor_radius + 1
self.lam = lam * length / (self.neighborhood_size**2)
self.D = tf_get_delta(get_sparse_penalty_matrix((self.neighborhood_size,)), k) # Local patch to smooth
# Multiscale setup
self.bins = [np.arange(self.length)]
self.num_nodes = int(2**np.ceil(np.log2(self.length))) - 1
self.path_length = int(np.ceil(np.log2(length)))
# Binomial likelihoods loss function
self.q_indices = tf.placeholder(tf.int32, [None, self.path_length])
self.splits = tf.placeholder(tf.float32, [None, self.path_length])
self.q = tf.Variable([0.]*self.num_nodes)
self.sampled_q = tf.gather(self.q, self.q_indices)
self.sampled_probs = tf.reciprocal(1 + tf.exp(-self.sampled_q))
self.log_left_probs = self.splits * tf.log(tf.clip_by_value(self.sampled_probs, 1e-10, 1.0))
self.log_right_probs = (1 - self.splits) * tf.log(tf.clip_by_value(1 - self.sampled_probs, 1e-10, 1.0))
self.log_probs = tf.reduce_mean(-tf.reduce_sum(self.log_left_probs+self.log_right_probs, axis=[1]))
# Smooth a local patch centered on the target variables
self.neighborhood_indexes = tf.placeholder(tf.int32, [None, self.neighborhood_size, self.path_length])
self.neighborhood_splits = tf.placeholder(tf.float32, [None, self.neighborhood_size, self.path_length])
self.neighborhood_q = tf.gather(self.q, self.neighborhood_indexes)
self.neighborhood_probs = tf.reciprocal(1 + tf.exp(-self.neighborhood_q))
self.neighborhood_log_left = self.neighborhood_splits * tf.log(tf.clip_by_value(self.neighborhood_probs, 1e-10, 1.0))
self.neighborhood_log_right = (1 - self.neighborhood_splits) * tf.log(tf.clip_by_value(1 - self.neighborhood_probs, 1e-10, 1.0))
self.neighborhood_log_probs = tf.reduce_sum(self.neighborhood_log_left+self.neighborhood_log_right, axis=[2])
self.reg = tf.reduce_sum(tf.abs(batch_sparse_tensor_dense_matmul(self.D, tf.expand_dims(self.neighborhood_log_probs, -1))))
# Add the loss and regularization penalty together
self.loss = self.log_probs + self.lam * self.reg
self.sampled_density = tf.reduce_prod(tf.where(self.splits > 0, self.sampled_probs, 1 - self.sampled_probs), axis=[1])
def setUp(self):
super(CoreUnaryOpsTest, self).setUp()
self.ops = [
('abs', operator.abs, tf.abs, core.abs_function),
('neg', operator.neg, tf.neg, core.neg),
# TODO(shoyer): add unary + to core TensorFlow
('pos', None, None, None),
('sign', None, tf.sign, core.sign),
('reciprocal', None, tf.reciprocal, core.reciprocal),
('square', None, tf.square, core.square),
('round', None, tf.round, core.round_function),
('sqrt', None, tf.sqrt, core.sqrt),
('rsqrt', None, tf.rsqrt, core.rsqrt),
('log', None, tf.log, core.log),
('exp', None, tf.exp, core.exp),
('log', None, tf.log, core.log),
('ceil', None, tf.ceil, core.ceil),
('floor', None, tf.floor, core.floor),
('cos', None, tf.cos, core.cos),
('sin', None, tf.sin, core.sin),
('tan', None, tf.tan, core.tan),
('acos', None, tf.acos, core.acos),
('asin', None, tf.asin, core.asin),
('atan', None, tf.atan, core.atan),
('lgamma', None, tf.lgamma, core.lgamma),
('digamma', None, tf.digamma, core.digamma),
('erf', None, tf.erf, core.erf),
('erfc', None, tf.erfc, core.erfc),
('lgamma', None, tf.lgamma, core.lgamma),
]
total_size = np.prod([v.size for v in self.original_lt.axes.values()])
self.test_lt = core.LabeledTensor(
tf.cast(self.original_lt, tf.float32) / total_size,
self.original_lt.axes)
def test_recipr(self):
my_graph = ad.Recipr(self.my_w0)
tf_graph = tf.reciprocal(self.tf_w0)
wrt_vars = [self.my_w0]
tf_vars = [self.tf_w0]
utils.custom_test(self, my_graph, wrt_vars, tf_graph, tf_vars)
def mix_prediction(losses, lam=0., mean_typ='arithmetic', weight_typ='normal', sign=-1., sf=1e-3):
# losses is shape (# of discriminators x batch_size)
# output is scalar
tf.assert_non_negative(lam)
assert mean_typ in ['arithmetic','geometric','harmonic']
assert weight_typ in ['normal','log']
assert sign == 1. or sign == -1.
assert sf > 0.
if lam == 0.:
weights = tf.ones_like(losses)
else:
if weight_typ == 'log':
weights = tf.pow(losses, lam)
else:
weights = tf.exp(lam * losses)
if mean_typ == 'arithmetic':
loss = weighted_arithmetic(weights, losses)
elif mean_typ == 'geometric':
log_losses = tf.log(sign*losses)
loss = sign*tf.exp(weighted_arithmetic(weights, log_losses))
else:
mn = tf.reduce_min(losses) - sf
inv_losses = tf.reciprocal(losses-mn)
loss = mn + tf.reciprocal(weighted_arithmetic(weights, inv_losses))
return loss
def gaussian_kl_div(mean_0, cov_0, mean_1, cov_1, dim):
""" computes KL divergences between two Gaussians with given parameters"""
mean_diff = mean_1 - mean_0
cov_1_inv = tf.reciprocal(cov_1)
log_cov_1_det = tf.reduce_sum(tf.log(cov_1), axis=[1])
log_cov_0_det = tf.reduce_sum(tf.log(cov_0), axis=[1])
log_term = log_cov_1_det - log_cov_0_det
trace_term = tf.reduce_sum(cov_1_inv * cov_0, axis=[1])
square_term = tf.reduce_sum(mean_diff * cov_1_inv * mean_diff, axis=[1])
kl_div = 0.5 * (trace_term + square_term - dim + log_term)
return kl_div
def get_mixture_coef( self, args, output ):
# returns the tf slices containing mdn dist params
# ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850
z = output
#get the remaining parameters
last = args.nroutputvars_raw - args.nrClassOutputVars
z_eos = z[ :, 0 ]
z_eos = tf.sigmoid( z_eos ) #eos: sigmoid, eq 18
z_eod = z[ :, 1 ]
z_eod = tf.sigmoid( z_eod ) #eod: sigmoid
z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split( z[ :, 2:last ], 6, 1 ) #eq 20: mu1, mu2: no transformation required
# process output z's into MDN parameters
# softmax all the pi's:
max_pi = tf.reduce_max( z_pi, 1, keep_dims = True )
z_pi = tf.subtract( z_pi, max_pi ) #EdJ: subtract max pi for numerical stabilization
z_pi = tf.exp( z_pi ) #eq 19
normalize_pi = tf.reciprocal( tf.reduce_sum( z_pi, 1, keep_dims = True ) )
z_pi = tf.multiply( normalize_pi, z_pi ) #19
# exponentiate the sigmas and also make corr between -1 and 1.
z_sigma1 = tf.exp( z_sigma1 ) #eq 21
z_sigma2 = tf.exp( z_sigma2 )
z_corr_tanh = tf.tanh( z_corr ) #eq 22
z_corr_tanh = .95 * z_corr_tanh #avoid -1 and 1
z_corr_tanh_adj = z_corr_tanh
return [ z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr_tanh_adj, z_eos, z_eod ]
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
factor = tf.get_variable(
'factor', 1, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
if self.bn:
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
# inv_factor = tf.reciprocal(factor)
inv_factor = tf.div(1., factor)
mean = tf.multiply(inv_factor, mean)
variance = tf.multiply(inv_factor, variance)
# tf.summary.histogram(mean.op.name, mean)
# tf.summary.histogram(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y