def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params):
with tf.name_scope("loss_mix2"):
float_labels = tf.cast(labels, tf.float32)
float_encoders = float_labels
for i in range(FLAGS.encoder_layers):
var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i)
weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32)
bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1])
float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i)
if i<FLAGS.encoder_layers-1:
float_encoders = tf.nn.relu(float_encoders)
else:
hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True)
hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True))
float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6)
#float_encoders = tf.nn.sigmoid(float_encoders)
cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders)
cross_entropy_loss = self.calculate_loss(predictions,labels)
return cross_entropy_encoder+cross_entropy_loss, float_encoders
#return cross_entropy_encoder, float_encoders
python类square()的实例源码
def build_model(self):
self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question")
self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer")
self.build_encoder()
self.build_decoder()
# Kullback Leibler divergence
self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq))
# Log likelihood
self.g_loss = tf.reduce_sum(tf.log(self.p_x_i))
self.loss = tf.reduce_mean(self.e_loss + self.g_loss)
self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss)
_ = tf.scalar_summary("encoder loss", self.e_loss)
_ = tf.scalar_summary("decoder loss", self.g_loss)
_ = tf.scalar_summary("loss", self.loss)
def log_variable(variable, gradient=None):
r'''
We introduce a function for logging a tensor variable's current state.
It logs scalar values for the mean, standard deviation, minimum and maximum.
Furthermore it logs a histogram of its state and (if given) of an optimization gradient.
'''
name = variable.name
mean = tf.reduce_mean(variable)
tf.summary.scalar(name='%s/mean' % name, tensor=mean)
tf.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean))))
tf.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable))
tf.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable))
tf.summary.histogram(name=name, values=variable)
if gradient is not None:
if isinstance(gradient, tf.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
tf.summary.histogram(name='%s/gradients' % name, values=grad_values)
def log_variable(variable, gradient=None):
r'''
We introduce a function for logging a tensor variable's current state.
It logs scalar values for the mean, standard deviation, minimum and maximum.
Furthermore it logs a histogram of its state and (if given) of an optimization gradient.
'''
name = variable.name
mean = tf.reduce_mean(variable)
tf.summary.scalar(name='%s/mean' % name, tensor=mean)
tf.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean))))
tf.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable))
tf.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable))
tf.summary.histogram(name=name, values=variable)
if gradient is not None:
if isinstance(gradient, tf.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
tf.summary.histogram(name='%s/gradients' % name, values=grad_values)
def log_variable(variable, gradient=None):
r'''
We introduce a function for logging a tensor variable's current state.
It logs scalar values for the mean, standard deviation, minimum and maximum.
Furthermore it logs a histogram of its state and (if given) of an optimization gradient.
'''
name = variable.name
mean = tf.reduce_mean(variable)
tf.summary.scalar(name='%s/mean' % name, tensor=mean)
tf.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean))))
tf.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable))
tf.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable))
tf.summary.histogram(name=name, values=variable)
if gradient is not None:
if isinstance(gradient, tf.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
tf.summary.histogram(name='%s/gradients' % name, values=grad_values)
def batchnorm(x, name, phase, updates, gamma=0.96):
k = x.get_shape()[1]
runningmean = tf.get_variable(name+"/mean", shape=[1, k], initializer=tf.constant_initializer(0.0), trainable=False)
runningvar = tf.get_variable(name+"/var", shape=[1, k], initializer=tf.constant_initializer(1e-4), trainable=False)
testy = (x - runningmean) / tf.sqrt(runningvar)
mean_ = mean(x, axis=0, keepdims=True)
var_ = mean(tf.square(x), axis=0, keepdims=True)
std = tf.sqrt(var_)
trainy = (x - mean_) / std
updates.extend([
tf.assign(runningmean, runningmean * gamma + mean_ * (1 - gamma)),
tf.assign(runningvar, runningvar * gamma + var_ * (1 - gamma))
])
y = switch(phase, trainy, testy)
out = y * tf.get_variable(name+"/scaling", shape=[1, k], initializer=tf.constant_initializer(1.0), trainable=True)\
+ tf.get_variable(name+"/translation", shape=[1,k], initializer=tf.constant_initializer(0.0), trainable=True)
return out
# ================================================================
# Mathematical utils
# ================================================================
def huber_loss(x, delta=1.0):
"""Reference: https://en.wikipedia.org/wiki/Huber_loss"""
return tf.where(
tf.abs(x) < delta,
tf.square(x) * 0.5,
delta * (tf.abs(x) - 0.5 * delta)
)
# ================================================================
# Basic Stuff
# ================================================================
# ================================================================
# Theano-like Function
# ================================================================
# ================================================================
# Optimizer utils
# ================================================================
def batchnormalize(X, eps=1e-8, g=None, b=None):
if X.get_shape().ndims == 4:
mean = tf.reduce_mean(X, [0,1,2])
std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )
X = (X-mean) / tf.sqrt(std+eps)
if g is not None and b is not None:
g = tf.reshape(g, [1,1,1,-1])
b = tf.reshape(b, [1,1,1,-1])
X = X*g + b
elif X.get_shape().ndims == 2:
mean = tf.reduce_mean(X, 0)
std = tf.reduce_mean(tf.square(X-mean), 0)
X = (X-mean) / tf.sqrt(std+eps)
if g is not None and b is not None:
g = tf.reshape(g, [1,-1])
b = tf.reshape(b, [1,-1])
X = X*g + b
else:
raise NotImplementedError
return X
def Grad_Penalty(real_data,fake_data,Discriminator,config):
'''
Implemention from "Improved training of Wasserstein"
Interpolation based estimation of the gradient of the discriminator.
Used to penalize the derivative rather than explicitly constrain lipschitz.
'''
batch_size=config.batch_size
LAMBDA=config.lambda_W
n_hidden=config.critic_hidden_size
alpha = tf.random_uniform([batch_size,1],0.,1.)
interpolates = alpha*real_data + ((1-alpha)*fake_data)#Could do more if not fixed batch_size
disc_interpolates = Discriminator(interpolates,batch_size,n_hidden=n_hidden,config=config, reuse=True)[1]#logits
gradients = tf.gradients(disc_interpolates,[interpolates])[0]#orig
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1)**2)
grad_cost = LAMBDA*gradient_penalty
return grad_cost,slopes
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'):
num_inputs=df_dim*4
theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05))
log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0))
W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0))
W = tf.reshape(W,[-1,num_kernels*dim_per_kernel])
x = input
x=tf.reshape(x, [batchsize,num_inputs])
activation = tf.matmul(x, W)
activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel])
abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2),
1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1))
f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif))
print(f.get_shape())
print(input.get_shape())
return tf.concat(1,[x, f])
def layer_norm_all(h, base, num_units, scope):
# Layer Norm (faster version)
#
# Performs layer norm on multiple base at once (ie, i, g, j, o for lstm)
#
# Reshapes h in to perform layer norm in parallel
with tf.variable_scope(scope):
h_reshape = tf.reshape(h, [-1, base, num_units])
mean = tf.reduce_mean(h_reshape, [2], keep_dims=True)
var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True)
epsilon = tf.constant(1e-3)
rstd = tf.rsqrt(var + epsilon)
h_reshape = (h_reshape - mean) * rstd
# reshape back to original
h = tf.reshape(h_reshape, [-1, base * num_units])
alpha = tf.get_variable('layer_norm_alpha', [4 * num_units],
initializer=tf.constant_initializer(1.0), dtype=tf.float32)
bias = tf.get_variable('layer_norm_bias', [4 * num_units],
initializer=tf.constant_initializer(0.0), dtype=tf.float32)
return (h * alpha) + bias
def shrink_bgest(r,rvar,theta):
"""Bernoulli-Gaussian MMSE estimator
Perform MMSE estimation E[x|r]
for x ~ BernoulliGaussian(lambda,xvar1)
r|x ~ Normal(x,rvar)
The parameters theta[0],theta[1] represent
The variance of non-zero x[i]
xvar1 = abs(theta[0])
The probability of nonzero x[i]
lamba = 1/(exp(theta[1])+1)
"""
xvar1 = abs(theta[...,0])
loglam = theta[...,1] # log(1/lambda - 1)
beta = 1/(1+rvar/xvar1)
r2scale = r*r*beta/rvar
rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar)
rho1 = rho+1
xhat = beta*r/rho1
dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 ))
dxdr = tf.reduce_mean(dxdr,0)
return (xhat,dxdr)
def shrink_spline(r,rvar,theta):
""" Spline-based shrinkage function
"""
scale = theta[0]*tf.sqrt(rvar)
rs = tf.sign(r)
ar = tf.abs(r/scale)
ar2 = tf.square(ar)
ar3 = ar*ar2
reg1 = tf.to_float(ar<1)
reg2 = tf.to_float(ar<2)-reg1
ar_m2 = 2-ar
ar_m2_p2 = tf.square(ar_m2)
ar_m2_p3 = ar_m2*ar_m2_p2
beta3 = ( (2./3 - ar2 + .5*ar3)*reg1 + (1./6*(ar_m2_p3))*reg2 )
xhat = r*(theta[1] + theta[2]*beta3)
return (xhat,auto_gradients(xhat,r))
def __init__(self, preds, labels, model, num_nodes, pos_weight, norm):
preds_sub = preds
labels_sub = labels
self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer
# Latent loss
self.log_lik = self.cost
self.kl = (0.5 / num_nodes) * tf.reduce_mean(tf.reduce_sum(1 + 2 * model.z_log_std - tf.square(model.z_mean) -
tf.square(tf.exp(model.z_log_std)), 1))
self.cost -= self.kl
self.opt_op = self.optimizer.minimize(self.cost)
self.grads_vars = self.optimizer.compute_gradients(self.cost)
self.correct_prediction = tf.equal(tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32),
tf.cast(labels_sub, tf.int32))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
def smooth_l1_loss(offsets, gt_offsets, scope=None):
"""
Smooth L1 loss between offsets and encoded_gt
ARGS
offsets: [m?, 5], predicted offsets for one example
gt_offsets: [m?, 5], correponding groundtruth offsets
RETURN
loss: scalar
"""
with tf.variable_scope(scope or 'smooth_l1_loss'):
gt_offsets = tf.stop_gradient(gt_offsets)
diff = tf.abs(offsets - gt_offsets)
lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32)
larger_mask = 1.0 - lesser_mask
losses = (0.5 * tf.square(diff)) * lesser_mask + (diff - 0.5) * larger_mask
return tf.reduce_sum(losses, 1)
def negative_l2_distance(x1, x2, axis=1):
"""
Negative L2 Distance.
.. math:: L = - \\sqrt{\\sum_i (x1_i - x2_i)^2}
Args:
x1: First term.
x2: Second term.
axis: Reduction Indices.
Returns:
Similarity Value.
"""
distance = tf.sqrt(tf.reduce_sum(tf.square(x1 - x2), axis=axis))
return - distance
def negative_square_l2_distance(x1, x2, axis=1):
"""
Negative Square L2 Distance.
.. math:: L = - \\sum_i (x1_i - x2_i)^2
Args:
x1: First term.
x2: Second term.
axis: Reduction Indices.
Returns:
Similarity Value.
"""
distance = tf.reduce_sum(tf.square(x1 - x2), axis=axis)
return - distance
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1 > 0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1 * v + (1. - mom1) * g
v_hat = v_t / (1. - tf.pow(mom1, t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2 * mg + (1. - mom2) * tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2, t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
def add_param(self, spec, shape, name, **kwargs):
param = self.add_param_plain(spec, shape, name, **kwargs)
if name is not None and name.startswith("W") and self.weight_normalization:
# Hacky: check if the parameter is a weight matrix. If so, apply weight normalization
if len(param.get_shape()) == 2:
v = param
g = self.add_param_plain(tf.ones_initializer, (shape[1],), name=name + "_wn/g")
param = v * (tf.reshape(g, (1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), 0, keep_dims=True)))
elif len(param.get_shape()) == 4:
v = param
g = self.add_param_plain(tf.ones_initializer, (shape[3],), name=name + "_wn/g")
param = v * (tf.reshape(g, (1, 1, 1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), [0, 1, 2],
keep_dims=True)))
else:
raise NotImplementedError
return param
def kl(self, old_dist_info, new_dist_info):
old_means = old_dist_info["mean"]
old_log_stds = old_dist_info["log_std"]
new_means = new_dist_info["mean"]
new_log_stds = new_dist_info["log_std"]
"""
Compute the KL divergence of two multivariate Gaussian distribution with
diagonal covariance matrices
"""
old_std = np.exp(old_log_stds)
new_std = np.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = np.square(old_means - new_means) + \
np.square(old_std) - np.square(new_std)
denominator = 2 * np.square(new_std) + 1e-8
return np.sum(
numerator / denominator + new_log_stds - old_log_stds, axis=-1)
# more lossy version
# return TT.sum(
# numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)
def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
old_means = old_dist_info_vars["mean"]
old_log_stds = old_dist_info_vars["log_std"]
new_means = new_dist_info_vars["mean"]
new_log_stds = new_dist_info_vars["log_std"]
"""
Compute the KL divergence of two multivariate Gaussian distribution with
diagonal covariance matrices
"""
old_std = tf.exp(old_log_stds)
new_std = tf.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = tf.square(old_means - new_means) + \
tf.square(old_std) - tf.square(new_std)
denominator = 2 * tf.square(new_std) + 1e-8
return tf.reduce_sum(
numerator / denominator + new_log_stds - old_log_stds, reduction_indices=-1)
def test_sgld_sparse(self):
tf.reset_default_graph()
z = tf.Variable(tf.zeros((5, 2)), dtype=tf.float32)
idx = tf.placeholder(tf.int32)
zi = tf.gather(z, idx)
zloss = tf.square(zi - [10.0, 5.0])
sgld = SGLD(learning_rate=0.4)
train_op_sgld = sgld.minimize(zloss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
self.assertTrue(np.alltrue(sess.run(z) == 0.0))
sess.run(train_op_sgld, feed_dict={idx: 3})
zh = sess.run(z)
self.assertTrue(np.alltrue(zh[[0, 1, 2, 4], :] == 0.0))
self.assertTrue(zh[3, 0] > 0)
def test_psgld_sparse(self):
tf.reset_default_graph()
z = tf.Variable(tf.zeros((5, 2)), dtype=tf.float32)
idx = tf.placeholder(tf.int32)
zi = tf.gather(z, idx)
zloss = tf.square(zi - [10.0, 5.0])
psgld = pSGLD(learning_rate=0.4)
train_op_psgld = psgld.minimize(zloss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
self.assertTrue(np.alltrue(sess.run(z) == 0.0))
sess.run(train_op_psgld, feed_dict={idx: 3})
zh = sess.run(z)
self.assertTrue(np.alltrue(zh[[0, 1, 2, 4], :] == 0.0))
self.assertTrue(zh[3, 0] > 0)
def gradient_penalty(self):
config = self.config
gan = self.gan
gradient_penalty = config.gradient_penalty
if has_attr(gan.inputs, 'gradient_penalty_label'):
x = gan.inputs.gradient_penalty_label
else:
x = gan.inputs.x
generator = self.generator or gan.generator
g = generator.sample
discriminator = self.discriminator or gan.discriminator
shape = [1 for t in g.get_shape()]
shape[0] = gan.batch_size()
uniform_noise = tf.random_uniform(shape=shape,minval=0.,maxval=1.)
print("[gradient penalty] applying x:", x, "g:", g, "noise:", uniform_noise)
interpolates = x + uniform_noise * (g - x)
reused_d = discriminator.reuse(interpolates)
gradients = tf.gradients(reused_d, [interpolates])[0]
penalty = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1))
penalty = tf.reduce_mean(tf.square(penalty - 1.))
return float(gradient_penalty) * penalty
def setup_param_noise(self, normalized_obs0):
assert self.param_noise is not None
# Configure perturbed actor.
param_noise_actor = copy(self.actor)
param_noise_actor.name = 'param_noise_actor'
self.perturbed_actor_tf = param_noise_actor(normalized_obs0)
logger.info('setting up param noise')
self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev)
# Configure separate copy for stddev adoption.
adaptive_param_noise_actor = copy(self.actor)
adaptive_param_noise_actor.name = 'adaptive_param_noise_actor'
adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0)
self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor, self.param_noise_stddev)
self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf)))
def setup_critic_optimizer(self):
logger.info('setting up critic optimizer')
normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1])
self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
if self.critic_l2_reg > 0.:
critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name]
for var in critic_reg_vars:
logger.info(' regularizing: {}'.format(var.name))
logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg))
critic_reg = tc.layers.apply_regularization(
tc.layers.l2_regularizer(self.critic_l2_reg),
weights_list=critic_reg_vars
)
self.critic_loss += critic_reg
critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
logger.info(' critic shapes: {}'.format(critic_shapes))
logger.info(' critic params: {}'.format(critic_nb_params))
self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars,
beta1=0.9, beta2=0.999, epsilon=1e-08)
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations
vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg')
wd_dict = {}
h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0]
sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n))
wd_loss = tf.get_collection("vf_losses", None)
loss = U.mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss)
loss_sampled = U.mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n)))
self._predict = U.function([X], vpred_n)
optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
async=1, kfac_update=2, cold_iter=50, \
weight_decay_dict=wd_dict, max_grad_norm=None)
vf_var_list = []
for var in tf.trainable_variables():
if "vf" in var.name:
vf_var_list.append(var)
update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list)
self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101
U.initialize() # Initialize uninitialized TF variables
def __init__(self, epsilon=1e-2, shape=()):
self._sum = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="runningsum", trainable=False)
self._sumsq = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(epsilon),
name="runningsumsq", trainable=False)
self._count = tf.get_variable(
dtype=tf.float64,
shape=(),
initializer=tf.constant_initializer(epsilon),
name="count", trainable=False)
self.shape = shape
self.mean = tf.to_float(self._sum / self._count)
self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))
newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
updates=[tf.assign_add(self._sum, newsum),
tf.assign_add(self._sumsq, newsumsq),
tf.assign_add(self._count, newcount)])
def gauss_log_prob(mu, logstd, x):
""" Used for computing the log probability, following the formula for the
multivariate Gaussian density.
All the inputs should have shape (n,a). The `gp_na` contains component-wise
probabilitiles, then the reduce_sum results in a tensor of size (n,) which
contains the log probability for each of the n elements. (We later perform a
mean on this.) Also, the 2*pi part needs 1/2, but doesn't need the sum over
the number of components (# of actions) because of the reduce sum here.
Finally, logstd doesn't need a 1/2 constant because log(\sigma_i^2) will
bring the 2 over.
This formula generalizes for an arbitrary number of actions, BUT it assumes
that the covariance matrix is diagonal.
"""
var_na = tf.exp(2*logstd)
gp_na = -tf.square(x - mu)/(2*var_na) - 0.5*tf.log(tf.constant(2*np.pi)) - logstd
return tf.reduce_sum(gp_na, axis=[1])
def gauss_KL(mu1, logstd1, mu2, logstd2):
""" Returns KL divergence among two multivariate Gaussians, component-wise.
It assumes the covariance matrix is diagonal. All inputs have shape (n,a).
It is not necessary to know the number of actions because reduce_sum will
sum over this to get the `d` constant offset. The part consisting of the
trace in the formula is blended with the mean difference squared due to the
common "denominator" of var2_na. This forumula generalizes for an arbitrary
number of actions. I think mu2 and logstd2 should represent the policy
before the update.
Returns the KL divergence for each of the n components in the minibatch,
then we do a reduce_mean outside this.
"""
var1_na = tf.exp(2.*logstd1)
var2_na = tf.exp(2.*logstd2)
tmp_matrix = 2.*(logstd2 - logstd1) + (var1_na + tf.square(mu1-mu2))/var2_na - 1
kl_n = tf.reduce_sum(0.5 * tmp_matrix, axis=[1]) # Don't forget the 1/2 !!
assert_op = tf.Assert(tf.reduce_all(kl_n >= -0.0000001), [kl_n])
with tf.control_dependencies([assert_op]):
kl_n = tf.identity(kl_n)
return kl_n