def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
def leapfrog(pos, vel, step, i):
de_dp_ = tf.gradients(tf.reduce_sum(energy_fn(pos)), pos)[0]
new_vel_ = vel - step * de_dp_
new_pos_ = pos + step * new_vel_
return [new_pos_, new_vel_, step, tf.add(i, 1)]
def condition(pos, vel, step, i):
return tf.less(i, n_steps)
de_dp = tf.gradients(tf.reduce_sum(energy_fn(initial_pos)), initial_pos)[0]
vel_half_step = initial_vel - 0.5 * stepsize * de_dp
pos_full_step = initial_pos + stepsize * vel_half_step
i = tf.constant(0)
final_pos, new_vel, _, _ = tf.while_loop(condition, leapfrog, [pos_full_step, vel_half_step, stepsize, i])
de_dp = tf.gradients(tf.reduce_sum(energy_fn(final_pos)), final_pos)[0]
final_vel = new_vel - 0.5 * stepsize * de_dp
return final_pos, final_vel
python类gradients()的实例源码
def __init__(self, channels=3, n_class=2, cost="cross_entropy", cost_kwargs={}, **kwargs):
tf.reset_default_graph()
self.n_class = n_class
self.summaries = kwargs.get("summaries", True)
self.x = tf.placeholder("float", shape=[None, None, None, channels])
self.y = tf.placeholder("float", shape=[None, None, None, n_class])
self.keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
logits, self.variables, self.offset = create_conv_net(self.x, self.keep_prob, channels, n_class, **kwargs)
self.cost = self._get_cost(logits, cost, cost_kwargs)
self.gradients_node = tf.gradients(self.cost, self.variables)
self.cross_entropy = tf.reduce_mean(cross_entropy(tf.reshape(self.y, [-1, n_class]),
tf.reshape(pixel_wise_softmax_2(logits), [-1, n_class])))
self.predicter = pixel_wise_softmax_2(logits)
self.correct_pred = tf.equal(tf.argmax(self.predicter, 3), tf.argmax(self.y, 3))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def Grad_Penalty(real_data,fake_data,Discriminator,config):
'''
Implemention from "Improved training of Wasserstein"
Interpolation based estimation of the gradient of the discriminator.
Used to penalize the derivative rather than explicitly constrain lipschitz.
'''
batch_size=config.batch_size
LAMBDA=config.lambda_W
n_hidden=config.critic_hidden_size
alpha = tf.random_uniform([batch_size,1],0.,1.)
interpolates = alpha*real_data + ((1-alpha)*fake_data)#Could do more if not fixed batch_size
disc_interpolates = Discriminator(interpolates,batch_size,n_hidden=n_hidden,config=config, reuse=True)[1]#logits
gradients = tf.gradients(disc_interpolates,[interpolates])[0]#orig
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1)**2)
grad_cost = LAMBDA*gradient_penalty
return grad_cost,slopes
def _get_opt(self):
# build the self.opt_op for training
self.set_train_var()
tvars = self.var_list
self.print_trainable()
with tf.name_scope("Optimizer"):
opt = self._get_optx()
grads = tf.gradients(self.loss+self.l2loss, tvars)
grads = list(zip(grads, tvars))
# Op to update all variables according to their gradient
self.opt_op = opt.apply_gradients(grads_and_vars=grads,global_step = self.global_step)
if self.flags.visualize and "grad" in self.flags.visualize:
for grad, var in grads:
tf.summary.histogram(var.name + '/gradient', grad, collections=[tf.GraphKeys.GRADIENTS])
def pwlin_grid(r_,rvar_,theta_,dtheta = .75):
"""piecewise linear with noise-adaptive grid spacing.
returns xhat,dxdr
where
q = r/dtheta/sqrt(rvar)
xhat = r * interp(q,theta)
all but the last dimensions of theta must broadcast to r_
e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7)
"""
ntheta = int(theta_.get_shape()[-1])
scale_ = dtheta / tf.sqrt(rvar_)
ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 )
centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 )
outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center)
gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable)
xhat_ = gain_ * r_
dxdr_ = tf.gradients(xhat_,r_)[0]
return (xhat_,dxdr_)
def attack_single_step(self, x, eta, y):
"""
Given the original image and the perturbation computed so far, computes
a new perturbation.
:param x: A tensor with the original input.
:param eta: A tensor the same shape as x that holds the perturbation.
:param y: A tensor with the target labels or ground-truth labels.
"""
import tensorflow as tf
from cleverhans.utils_tf import model_loss, clip_eta
adv_x = x + eta
preds = self.model.get_probs(adv_x)
loss = model_loss(y, preds)
if self.targeted:
loss = -loss
grad, = tf.gradients(loss, adv_x)
scaled_signed_grad = self.eps_iter * tf.sign(grad)
adv_x = adv_x + scaled_signed_grad
if self.clip_min is not None and self.clip_max is not None:
adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
eta = adv_x - x
eta = clip_eta(eta, self.ord, self.eps)
return x, eta
def jacobian_graph(predictions, x, nb_classes):
"""
Create the Jacobian graph to be ran later in a TF session
:param predictions: the model's symbolic output (linear output,
pre-softmax)
:param x: the input placeholder
:param nb_classes: the number of classes the model has
:return:
"""
# This function will return a list of TF gradients
list_derivatives = []
# Define the TF graph elements to compute our derivatives for each class
for class_ind in xrange(nb_classes):
derivatives, = tf.gradients(predictions[:, class_ind], x)
list_derivatives.append(derivatives)
return list_derivatives
def test_fgm_gradient_max():
input_dim = 2
num_classes = 3
batch_size = 4
rng = np.random.RandomState([2017, 8, 23])
x = tf.placeholder(tf.float32, [batch_size, input_dim])
weights = tf.placeholder(tf.float32, [input_dim, num_classes])
logits = tf.matmul(x, weights)
probs = tf.nn.softmax(logits)
adv_x = fgm(x, probs)
random_example = rng.randint(batch_size)
random_feature = rng.randint(input_dim)
output = tf.slice(adv_x, [random_example, random_feature], [1, 1])
dx, = tf.gradients(output, x)
# The following line catches GitHub issue #243
assert dx is not None
sess = tf.Session()
dx = sess.run(dx, feed_dict=random_feed_dict(rng, [x, weights]))
ground_truth = np.zeros((batch_size, input_dim))
ground_truth[random_example, random_feature] = 1.
assert np.allclose(dx, ground_truth), (dx, ground_truth)
def test_generate_np_caches_graph_computation_for_eps_clip_or_xi(self):
import tensorflow as tf
x_val = np.random.rand(1, 2)
x_val = np.array(x_val, dtype=np.float32)
self.attack.generate_np(x_val, eps=.3, num_iterations=10,
clip_max=-5.0, clip_min=-5.0,
xi=1e-6)
old_grads = tf.gradients
def fn(*x, **y):
raise RuntimeError()
tf.gradients = fn
self.attack.generate_np(x_val, eps=.2, num_iterations=10,
clip_max=-4.0, clip_min=-4.0,
xi=1e-5)
tf.gradients = old_grads
def test_gradient(self):
x_var = tf.Variable(tf.zeros([3], dtype='float64'), name='x')
shape = loom.TypeShape('float64', (3,))
ops = {'add': BinaryLoomOp(shape, tf.add),
'mul': BinaryLoomOp(shape, tf.multiply)}
the_loom = loom.Loom(named_tensors={'x': x_var}, named_ops=ops)
output_tensor = the_loom.output_tensor(shape)
output = tf.reduce_sum(output_tensor)
gradient = tf.gradients(output, [x_var])[0]
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
weaver = the_loom.make_weaver()
m = weaver(np.array([1, 2, 3], dtype='float64'))
b = weaver(np.array([47, 9, -1], dtype='float64'))
mx = weaver.mul(m, weaver.x)
mx_plus_b = weaver.add(mx, b)
result = gradient.eval(feed_dict=weaver.build_feed_dict([mx_plus_b]))
self.assertTrue((result == np.array(
[1.0, 2.0, 3.0], dtype='float64')).all())
def test_gradient_with_direct_feed_dict(self):
x_var = tf.Variable(tf.zeros([3], dtype='float64'), name='x')
shape = loom.TypeShape('float64', (3,))
ops = {'add': BinaryLoomOp(shape, tf.add),
'mul': BinaryLoomOp(shape, tf.multiply)}
the_loom = loom.Loom(named_tensors={'x': x_var}, named_ops=ops,
direct_feed_dict=True)
output_tensor = the_loom.output_tensor(shape)
output = tf.reduce_sum(output_tensor)
gradient = tf.gradients(output, [x_var])[0]
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
weaver = the_loom.make_weaver()
m = weaver(np.array([1, 2, 3], dtype='float64'))
b = weaver(np.array([47, 9, -1], dtype='float64'))
mx = weaver.mul(m, weaver.x)
mx_plus_b = weaver.add(mx, b)
result = gradient.eval(feed_dict=weaver.build_feed_dict([mx_plus_b]))
self.assertTrue((result == np.array(
[1.0, 2.0, 3.0], dtype='float64')).all())
def __init__(self, sess, state_size, action_size,
batch_size, tau, learning_rate):
"""Init critic network."""
self.sess = sess
self.batch_size = batch_size
self.tau = tau
self.learning_rate = learning_rate
self.action_size = action_size
K.set_session(sess)
self.model, self.action, self.state = \
self.create_critic_network(state_size, action_size)
self.target_model, self.target_action, self.target_state = \
self.create_critic_network(state_size, action_size)
self.action_grads = tf.gradients(self.model.output, self.action)
self.sess.run(tf.initialize_all_variables())
def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE):
self.sess = sess
self.BATCH_SIZE = BATCH_SIZE
self.TAU = TAU
self.LEARNING_RATE = LEARNING_RATE
K.set_session(sess)
#Now create the model
self.model , self.weights, self.state = self.create_actor_network(state_size, action_size)
self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size)
self.action_gradient = tf.placeholder(tf.float32,[None, action_size])
self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
grads = zip(self.params_grad, self.weights)
self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads)
self.sess.run(tf.global_variables_initializer())
def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE):
self.sess = sess
self.BATCH_SIZE = BATCH_SIZE
self.TAU = TAU
self.LEARNING_RATE = LEARNING_RATE
K.set_session(sess)
#Now create the model
self.model , self.weights, self.state = self.create_actor_network(state_size, action_size)
self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size)
self.action_gradient = tf.placeholder(tf.float32,[None, action_size])
self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
grads = zip(self.params_grad, self.weights)
self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads)
self.sess.run(tf.global_variables_initializer())
def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE):
self.sess = sess
self.BATCH_SIZE = BATCH_SIZE
self.TAU = TAU
self.LEARNING_RATE = LEARNING_RATE
K.set_session(sess)
#Now create the model
self.model , self.weights, self.state = self.create_actor_network(state_size, action_size)
self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size)
self.action_gradient = tf.placeholder(tf.float32,[None, action_size])
self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
grads = zip(self.params_grad, self.weights)
self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads)
self.sess.run(tf.global_variables_initializer())
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1 > 0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1 * v + (1. - mom1) * g
v_hat = v_t / (1. - tf.pow(mom1, t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2 * mg + (1. - mom2) * tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2, t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
def get_update_op(self, loss, opts, global_step=None, max_gradient_norm=None, freeze_variables=None):
if loss is None:
return None
freeze_variables = freeze_variables or []
# compute gradient only for variables that are not frozen
frozen_parameters = [var.name for var in tf.trainable_variables()
if any(re.match(var_, var.name) for var_ in freeze_variables)]
params = [var for var in tf.trainable_variables() if var.name not in frozen_parameters]
self.params = params
gradients = tf.gradients(loss, params)
if max_gradient_norm:
gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
update_ops = []
for opt in opts:
with tf.variable_scope('gradients' if self.name is None else 'gradients_{}'.format(self.name)):
update_op = opt.apply_gradients(list(zip(gradients, params)), global_step=global_step)
update_ops.append(update_op)
return update_ops
def gradient_penalty(self):
config = self.config
gan = self.gan
gradient_penalty = config.gradient_penalty
if has_attr(gan.inputs, 'gradient_penalty_label'):
x = gan.inputs.gradient_penalty_label
else:
x = gan.inputs.x
generator = self.generator or gan.generator
g = generator.sample
discriminator = self.discriminator or gan.discriminator
shape = [1 for t in g.get_shape()]
shape[0] = gan.batch_size()
uniform_noise = tf.random_uniform(shape=shape,minval=0.,maxval=1.)
print("[gradient penalty] applying x:", x, "g:", g, "noise:", uniform_noise)
interpolates = x + uniform_noise * (g - x)
reused_d = discriminator.reuse(interpolates)
gradients = tf.gradients(reused_d, [interpolates])[0]
penalty = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1))
penalty = tf.reduce_mean(tf.square(penalty - 1.))
return float(gradient_penalty) * penalty
def _create_optimizer(self):
print('Create optimizer... ')
with tf.variable_scope('training'):
self.global_step = tf.Variable(
0, dtype=tf.int32, trainable=False, name='global_step')
if not self.fw_only:
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
trainable_vars = tf.trainable_variables()
self.gradient_norms = []
self.train_ops = []
start = time.time()
for bucket_id in range(len(config.BUCKETS)):
clipped_grads, norm = tf.clip_by_global_norm(
tf.gradients(self.losses[bucket_id], trainable_vars),
config.MAX_GRAD_NORM)
self.gradient_norms.append(norm)
self.train_ops.append(self.optimizer.apply_gradients(
zip(clipped_grads, trainable_vars),
global_step=self.global_step))
print('Creating opt for bucket {:d} took {:.2f} seconds.'.format(
bucket_id, time.time() - start))
start = time.time()
def update_weights(self, f):
"""
Gradient-based update of current Critic parameters. Also return the
action gradients for the Actor update later. This is the dQ/da in the
paper, and Q is the current Q network, not the target Q network.
"""
feed = {
self.obs_t_BO: f['obs_t_BO'],
self.act_t_BA: f['act_t_BA'],
self.rew_t_B: f['rew_t_B'],
self.obs_tp1_BO: f['obs_tp1_BO'],
self.done_mask_B: f['done_mask_B']
}
action_grads_BA, _, l2_error = self.sess.run([self.act_grads_BA, \
self.optimize_c, self.l2_error], feed)
# We assume that the only item in the list has what we want.
assert len(action_grads_BA) == 1
return action_grads_BA[0], l2_error
def _flatgrad(self, loss, var_list):
""" A Tensorflow version of John Schulman's `flatgrad` function. It
computes the gradients but does NOT apply them (for now).
This is only called during the `init` of the TRPO graph, so I think it's
OK. Otherwise, wouldn't it be constantly rebuilding the computational
graph? Or doing something else? Eh, for now I think it's OK.
Params:
loss: The loss function we're optimizing, which I assume is always
scalar-valued.
var_list: The list of variables (from `tf.trainable_variables()`) to
take gradients. This should only be for the policynets.
Returns:
A single flat vector with all gradients concatenated.
"""
grads = tf.gradients(loss, var_list)
return tf.concat([tf.reshape(g, [-1]) for g in grads], axis=0)
def test_linear_iaf(self):
with self.test_session(use_gpu=True) as sess:
z = []
vz = [0.1, -1.2, 1.0, -0.3, 1.2, 2, 10.0, -23.2]
for i in range(len(vz)):
z.append(np.array([[vz[i]]]))
z[i] = tf.constant(z[i], dtype=tf.float32)
z_0 = tf.concat(z, axis=1)
z_1, n_log_det_ja = inv_autoregressive_flow(
z_0, None, [0.0], linear_ar, n_iters=1)
n_log_det_ja = tf.reshape(n_log_det_ja, [])
grad = []
for i in range(len(vz)):
z_1i = z_1[0, i]
grad.append(tf.gradients(z_1i, z_0)[0])
jocabian = tf.concat(grad, axis=0)
log_det_jacobian = tf.log(tf.matrix_determinant(jocabian))
sess.run(tf.global_variables_initializer())
test_value, true_value = sess.run([-log_det_jacobian,
n_log_det_ja])
self.assertAllClose(test_value, true_value)
def __init__(self,sess,state_dim,action_dim,scope):
self.state_dim = state_dim
self.action_dim = action_dim
# create actor network
self.state_input,self.action_output,self.net = self.create_network(state_dim,action_dim,scope)
# create target actor network
self.target_state_input,self.target_action_output,self.target_update,self.target_net = self.create_target_network(state_dim,action_dim,self.net,scope)
# define training rules
if scope != 'global/actor':
self.q_gradient_input = tf.placeholder("float",[None,self.action_dim])
self.parameters_gradients = tf.gradients(self.action_output,self.net,-self.q_gradient_input)
global_vars_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global/actor')
self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients,global_vars_actor))
sess.run(tf.global_variables_initializer())
#self.update_target()
#self.load_network()
def init_optimizer(self):
print("setting optimizer..")
# Gradients and SGD update operation for training the model
trainable_params = tf.trainable_variables()
if self.optimizer.lower() == 'adadelta':
self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'adam':
self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'rmsprop':
self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
else:
self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
# Compute gradients of loss w.r.t. all trainable variables
gradients = tf.gradients(self.loss, trainable_params)
# Clip gradients by a given maximum_gradient_norm
clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm)
# Update the model
self.updates = self.opt.apply_gradients(
zip(clip_gradients, trainable_params), global_step=self.global_step)
def _deepfool2(model, x, epochs, eta, clip_min, clip_max, min_prob):
y0 = tf.stop_gradient(tf.reshape(model(x), [-1])[0])
y0 = tf.to_int32(tf.greater(y0, 0.5))
def _cond(i, z):
xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max)
y = tf.stop_gradient(tf.reshape(model(xadv), [-1])[0])
y = tf.to_int32(tf.greater(y, 0.5))
return tf.logical_and(tf.less(i, epochs), tf.equal(y0, y))
def _body(i, z):
xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max)
y = tf.reshape(model(xadv), [-1])[0]
g = tf.gradients(y, xadv)[0]
dx = - y * g / tf.norm(g)
return i+1, z+dx
_, noise = tf.while_loop(_cond, _body, [0, tf.zeros_like(x)],
name='_deepfool2_impl', back_prop=False)
return noise
def __init__(self,x_op,y_op,sess,remove_bias=False):
# Save parameters
self.x_op = x_op
self.y_op = y_op
self.sess = sess
self.remove_bias = remove_bias
# Get dimensions and data types
self.shape0 = x_op.get_shape()
self.shape1 = y_op.get_shape()
self.dtype0 = x_op.dtype
self.dtype1 = y_op.dtype
# Create the ops for the gradient. If the linear operator is y=F(x),
# then z = y'*F(x). Therefore, dz/dx = F'(y).
self.ytr_op = tf.placeholder(self.dtype1,self.shape1)
self.z_op = tf.reduce_sum(tf.multiply(tf.conj(self.ytr_op),self.y_op))
self.zgrad_op = tf.gradients(self.z_op,self.x_op)[0]
# Compute output at zero to subtract
if self.remove_bias:
xzero = np.zeros(self.shape0)
self.y_bias = self.sess.run(self.y_op, feed_dict={self.x_op: xzero})
else:
self.y_bias = 0
def _build_train_op(self):
"""Build training specific ops for the graph."""
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
tf.summary.scalar('learning_rate', self.lrn_rate)
trainable_variables = tf.trainable_variables()
grads = tf.gradients(self.cost, trainable_variables)
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
apply_op = optimizer.apply_gradients(
zip(grads, trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.train_op = tf.group(*train_ops)
# TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
def _deconvolution(graph, sess, op_tensor, X, feed_dict):
out = []
with graph.as_default() as g:
# get shape of tensor
tensor_shape = op_tensor.get_shape().as_list()
with sess.as_default() as sess:
# creating placeholders to pass featuremaps and
# creating gradient ops
featuremap = [tf.placeholder(tf.int32) for i in range(config["N"])]
reconstruct = [tf.gradients(tf.transpose(tf.transpose(op_tensor)[featuremap[i]]), X)[0] for i in range(config["N"])]
# Execute the gradient operations in batches of 'n'
for i in range(0, tensor_shape[-1], config["N"]):
c = 0
for j in range(config["N"]):
if (i + j) < tensor_shape[-1]:
feed_dict[featuremap[j]] = i + j
c += 1
if c > 0:
out.extend(sess.run(reconstruct[:c], feed_dict = feed_dict))
return out
def create_summaries(self, verbose=2):
""" Create summaries with `verbose` level """
summ_collection = self.name + "_training_summaries"
if verbose in [3]:
# Summarize activations
activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS)
summarize_activations(activations, summ_collection)
if verbose in [2, 3]:
# Summarize variable weights
summarize_variables(self.train_vars, summ_collection)
if verbose in [1, 2, 3]:
# Summarize gradients
summarize_gradients(self.grad, summ_collection)
self.summ_op = merge_summary(tf.get_collection(summ_collection))
def classify(model_range, seg_range, feature_lr, classifier_lr):
feat_opt = tf.train.AdamOptimizer(feature_lr)
clas_opt = tf.train.AdamOptimizer(classifier_lr)
for model in model_range:
for seg in seg_range:
with tf.variable_scope('classifier-{}-{}'.format(model, seg)):
self.preds[(model, seg)] = slim.conv2d(self.feature, 500, [1, 1])
self.clas_vars[(model, seg)] = slim.get_model_variables()[-2:]
with tf.variable_scope('losses-{}-{}'.format(model, seg)):
self.losses[(model, seg)] = self.loss(self.labels, self.preds[(model, seg)])
grad = tf.gradients(self.losses[(model, seg)], self.feat_vars + self.clas_vars[(model, seg)])
train_op_feat = feat_opt.apply_gradients(zip(grad[:-2], self.feat_vars))
train_op_clas = clas_opt.apply_gradients(zip(grad[-2:], self.clas_vars[(model, seg)]))
self.train_ops[(model, seg)] = tf.group(train_op_feat, train_op_clas)
return self.losses, self.train_ops