def _apply_dense(self, grad, weight):
learning_rate_t = tf.cast(self._lr_t, weight.dtype.base_dtype)
mu_t = tf.cast(self._mu_t, weight.dtype.base_dtype)
norm_t = tf.cast(self._norm_t, weight.dtype.base_dtype)
momentum = self.get_slot(weight, "a")
norm = self.get_slot(weight, "n")
if momentum.get_shape().ndims == 2:
momentum_mean = tf.reduce_mean(momentum, axis=1, keep_dims=True)
elif momentum.get_shape().ndims == 1:
momentum_mean = momentum
else:
momentum_mean = momentum
norm_update = learning_rate_t / norm + norm
norm_t = tf.assign(norm_t, norm_update)
momentum_update = (grad / norm_t) + (mu_t * momentum_mean)
momentum_t = tf.assign(momentum, momentum_update,
use_locking=self._use_locking)
weight_update = learning_rate_t * momentum_t
weight_t = tf.assign_sub(
weight, weight_update, use_locking=self._use_locking)
return tf.group(*[weight_t, norm_t, momentum_t])
python类group()的实例源码
def __init__(self, tag, x, summary_fn=tf.summary.scalar, summary_args=(), scope=None):
"""
Initializes an Average.
Arguments
x: Tensor to be averaged over multiple runs.
tag: Tag for the summary.
summary_fn: Function used for creating a summary.
summary_args: Arguments passed to the summary function.
"""
with tf.variable_scope(scope or type(self).__name__):
counter = tf.Variable(name="counter", initial_value=tf.constant(0),
dtype=tf.int32, trainable=False)
running_sum = tf.Variable(name="running_sum", initial_value=tf.constant(0.),
dtype=tf.float32, trainable=False)
self._running_average = running_sum / tf.cast(counter, tf.float32)
self._summary = summary_fn(tag or x.name + '_avg', self._running_average, **summary_args)
self._update_op = tf.group(counter.assign_add(1), running_sum.assign_add(x))
self._reset_op = tf.group(counter.assign(0), running_sum.assign(0.))
def build_model(self):
Gen=GeneratorTypes[self.gan_type]
config=self.config
self.gen=Gen(config.batch_size,config.gen_hidden_size,config.gen_z_dim)
with tf.variable_scope('Disc') as scope:
self.D1 = Discriminator(self.data.X, config.disc_hidden_size)
scope.reuse_variables()
self.D2 = Discriminator(self.gen.X, config.disc_hidden_size)
d_var = tf.contrib.framework.get_variables(scope)
d_loss_real=tf.reduce_mean( sxe(self.D1,1) )
d_loss_fake=tf.reduce_mean( sxe(self.D2,0) )
self.loss_d = d_loss_real + d_loss_fake
self.loss_g = tf.reduce_mean( sxe(self.D2,1) )
optimizer=tf.train.AdamOptimizer
g_optimizer=optimizer(self.config.lr_gen)
d_optimizer=optimizer(self.config.lr_disc)
self.opt_d = d_optimizer.minimize(self.loss_d,var_list= d_var)
self.opt_g = g_optimizer.minimize(self.loss_g,var_list= self.gen.tr_var,
global_step=self.gen.step)
with tf.control_dependencies([self.inc_step]):
self.train_op=tf.group(self.opt_d,self.opt_g)
def build_train_op(self):
config=self.config
self.g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.g_loss, var_list=self.g_vars)
self.d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.d_loss, var_list=self.d_vars)
self.d_label_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.d_labelLossReal, var_list=self.dl_vars)
self.d_gen_label_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.g_lossLabels_GLabeler, var_list=self.dl_gen_vars)
self.d_on_z_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.g_loss_on_z + self.rec_loss_coeff*self.real_reconstruction_loss, var_list=self.dz_vars)
self.k_t_update = tf.assign(self.k_t, self.k_t*tf.exp(-1.0/config.tau) )
self.train_op=tf.group(self.d_gen_label_optim,self.d_label_optim,self.d_optim,self.g_optim,self.d_on_z_optim)
def build_optim(self, loss):
global_step = self.global_step
learn_rate = self.learn_rate
# We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers.
grads = self.average_gradients(self.tower_grads)
# Apply the gradients to adjust the shared variables.
apply_gradient_op = self.opt.apply_gradients(
grads, global_step=global_step)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
0.999, global_step)
variables_averages_op = variable_averages.apply(
tf.trainable_variables())
# Group all updates to into a single train op.
train_op = tf.group(apply_gradient_op, variables_averages_op)
# for m in self.sub_models:
# self.log.info(m.device)
# self.log.fatal('haha')
return train_op
def conv(self, input, kernel, biases, k_h, k_w, c_o, s_h, s_w, padding="VALID", group=1):
'''From https://github.com/ethereon/caffe-tensorflow
'''
c_i = input.get_shape()[-1]
assert c_i%group==0
assert c_o%group==0
convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
if group==1:
conv = convolve(input, kernel)
else:
#input_groups = tf.split(3, group, input)
#kernel_groups = tf.split(3, group, kernel)
input_groups = tf.split(input, group, 3)
kernel_groups = tf.split(kernel, group, 3)
output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)]
#conv = tf.concat(3, output_groups)
conv = tf.concat(output_groups, 3)
return tf.reshape(tf.nn.bias_add(conv, biases), [-1]+conv.get_shape().as_list()[1:])
test_conv1.py 文件源码
项目:tensorflow-action-conditional-video-prediction
作者: williamd4112
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def main(args):
with tf.Graph().as_default() as graph:
# Create dataset
logging.info('Create data flow from %s' % args.data)
caffe_dataset = CaffeDataset(dir=args.data, num_act=args.num_act, mean_path=args.mean)
# Config session
config = get_config(args)
x = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 12])
op = load_caffe_model(x, args.load)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# Start session
with tf.Session(config=config) as sess:
sess.run(init)
i = 0
for s, a in caffe_dataset(5):
pred_data = sess.run([op], feed_dict={x: [s]})[0]
print pred_data.shape
np.save('tf-%03d.npy' % i, pred_data)
i += 1
def evaluate():
"""Eval ocr for a number of steps."""
with tf.Graph().as_default() as g:
images, labels, seq_lengths = ocr.inputs()
logits, timesteps = ocr.inference(images, FLAGS.eval_batch_size, train=True)
ler = ocr.create_label_error_rate(logits, labels, timesteps)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
config = tf.ConfigProto(
device_count={'GPU': 0}
)
sess = tf.Session(config=config)
sess.run(init_op)
saver = tf.train.Saver()
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
while True:
eval_once(saver, summary_writer, ler, summary_op)
if FLAGS.run_once:
break
# print("Waiting for next evaluation for " + str(FLAGS.eval_interval_secs) + " sec")
time.sleep(FLAGS.eval_interval_secs)
def main(args):
with tf.Graph().as_default() as graph:
# Create dataset
logging.info('Create data flow from %s' % args.data)
caffe_dataset = CaffeDataset(dir=args.data, num_act=args.num_act, mean_path=args.mean)
# Config session
config = get_config(args)
x = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 12])
op = load_caffe_model(x, args.load)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# Start session
with tf.Session(config=config) as sess:
sess.run(init)
i = 0
for s, a in caffe_dataset(5):
pred_data = sess.run([op], feed_dict={x: [s]})[0]
print pred_data.shape
np.save('tf-%03d.npy' % i, pred_data)
i += 1
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1 > 0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1 * v + (1. - mom1) * g
v_hat = v_t / (1. - tf.pow(mom1, t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2 * mg + (1. - mom2) * tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2, t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def __init__(self, inputs, outputs, updates=[]):
assert type(inputs) in {list, tuple}, 'Input to a TensorFlow backend function should be a list or tuple.'
assert type(outputs) in {list, tuple}, 'Output to a TensorFlow backend function should be a list or tuple.'
assert type(updates) in {list, tuple}, 'Updates in a TensorFlow backend function should be a list or tuple.'
self.inputs = list(inputs)
self.outputs = list(outputs)
with tf.control_dependencies(self.outputs):
updates_ops = []
for update in updates:
if type(update) is tuple:
p, new_p = update
updates_ops.append(tf.assign(p, new_p))
else:
# assumed already an op
updates_ops.append(update)
self.updates_op = tf.group(*updates_ops)
def predict_sym(self, xs):
return L.get_output(self.l_out, xs)
# def fit(self, xs, ys):
# if self._normalize_inputs:
# # recompute normalizing constants for inputs
# new_mean = np.mean(xs, axis=0, keepdims=True)
# new_std = np.std(xs, axis=0, keepdims=True) + 1e-8
# tf.get_default_session().run(tf.group(
# tf.assign(self._x_mean_var, new_mean),
# tf.assign(self._x_std_var, new_std),
# ))
# inputs = [xs, ys]
# loss_before = self._optimizer.loss(inputs)
# if self._name:
# prefix = self._name + "_"
# else:
# prefix = ""
# logger.record_tabular(prefix + 'LossBefore', loss_before)
# self._optimizer.optimize(inputs)
# loss_after = self._optimizer.loss(inputs)
# logger.record_tabular(prefix + 'LossAfter', loss_after)
# logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
with tf.name_scope(name, self._name) as name:
update_op = self._opt.apply_gradients(
grads_and_vars, global_step=global_step)
add_noise_ops = []
with tf.control_dependencies([update_op]):
for grad, var in grads_and_vars:
if grad is None:
continue
with tf.name_scope("sgld_noise_" + var.op.name):
if isinstance(grad, tf.Tensor):
add_noise_ops.append(self._noise_dense(var))
else:
add_noise_ops.append(self._noise_sparse(grad, var))
## running combined op
return tf.group(*([update_op] + add_noise_ops), name=name)
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
with tf.name_scope(name, self._name) as name:
update_op = self._opt.apply_gradients(
grads_and_vars, global_step=global_step)
add_noise_ops = []
with tf.control_dependencies([update_op]):
for grad, var in grads_and_vars:
if grad is None:
continue
with tf.name_scope("psgld_noise_" + var.op.name):
if isinstance(grad, tf.Tensor):
add_noise_ops.append(self._noise_dense(var))
else:
add_noise_ops.append(self._noise_sparse(grad, var))
## running combined op
return tf.group(*([update_op] + add_noise_ops), name=name)
def copy_all_vars(from_namespace, to_namespace, affine_coefficient=1.0):
assert affine_coefficient >= 0.0 and affine_coefficient <= 1.0
copy_ops = []
with tf.variable_scope("", reuse=True): # for grabbing the targets by full namespace
for src_var in tf.all_variables():
# ignore any variable not in src namespace
if not src_var.name.startswith(from_namespace):
continue
# fetch reference to target variable with the same name as the src variable
assert src_var.name.endswith(":0")
target_var_name = src_var.name.replace(from_namespace, to_namespace).replace(":0", "")
target_var = tf.get_variable(target_var_name, src_var.get_shape())
# create a copy op to clobber target with src
# target = alpha * src + (1.0-alpha) * target
copy_ops.append(target_var.assign_sub(affine_coefficient * (target_var - src_var)))
single_copy_op = tf.group(*copy_ops)
return single_copy_op
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
# Can't use 1e-8 due to underflow
eps = 1e-7
else:
eps = 1e-8
v = self.get_slot(var, "v")
v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = v_t / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t, v_t])
def _build_train_op(self):
"""Build training specific ops for the graph."""
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
tf.summary.scalar('learning_rate', self.lrn_rate)
trainable_variables = tf.trainable_variables()
grads = tf.gradients(self.cost, trainable_variables)
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
apply_op = optimizer.apply_gradients(
zip(grads, trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.train_op = tf.group(*train_ops)
# TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
def classify(model_range, seg_range, feature_lr, classifier_lr):
feat_opt = tf.train.AdamOptimizer(feature_lr)
clas_opt = tf.train.AdamOptimizer(classifier_lr)
for model in model_range:
for seg in seg_range:
with tf.variable_scope('classifier-{}-{}'.format(model, seg)):
self.preds[(model, seg)] = slim.conv2d(self.feature, 500, [1, 1])
self.clas_vars[(model, seg)] = slim.get_model_variables()[-2:]
with tf.variable_scope('losses-{}-{}'.format(model, seg)):
self.losses[(model, seg)] = self.loss(self.labels, self.preds[(model, seg)])
grad = tf.gradients(self.losses[(model, seg)], self.feat_vars + self.clas_vars[(model, seg)])
train_op_feat = feat_opt.apply_gradients(zip(grad[:-2], self.feat_vars))
train_op_clas = clas_opt.apply_gradients(zip(grad[-2:], self.clas_vars[(model, seg)]))
self.train_ops[(model, seg)] = tf.group(train_op_feat, train_op_clas)
return self.losses, self.train_ops
def time_tensorflow_run(session, target, info_string):
#num_steps_burn_in = 10
num_steps_burn_in = 0
total_duration = 0.0
total_duration_squared = 0.0
if not isinstance(target, list):
target = [target]
target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target_op)
duration = time.time() - start_time
if i > num_steps_burn_in:
if not i % 10:
print ('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('fake %s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10
total_duration = 0.0
total_duration_squared = 0.0
if not isinstance(target, list):
target = [target]
target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target_op)
duration = time.time() - start_time
if i > num_steps_burn_in:
if not i % 10:
print ('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10
#num_steps_burn_in = 0
total_duration = 0.0
total_duration_squared = 0.0
if not isinstance(target, list):
target = [target]
target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target_op)
duration = time.time() - start_time
if i > num_steps_burn_in:
#if not i % 10:
if not i % 1:
print ('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('fake %s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10
total_duration = 0.0
total_duration_squared = 0.0
if not isinstance(target, list):
target = [target]
target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target_op)
duration = time.time() - start_time
if i > num_steps_burn_in:
if not i % 10:
print ('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10
#num_steps_burn_in = 0
total_duration = 0.0
total_duration_squared = 0.0
if not isinstance(target, list):
target = [target]
target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
session.run(target_op)
duration = time.time() - start_time
if i > num_steps_burn_in:
#saver.save(session, './models/my-model', global_step=i)
#if not i % 10:
if not i % 1:
print ('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('fake %s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def __init__(self, config, model):
assert isinstance(model, Model)
self.config = config
self.model = model
self.opt = tf.train.AdagradOptimizer(config.init_lr)
self.loss = model.get_loss()
self.var_list = model.get_var_list()
self.global_step = model.get_global_step()
self.ema_op = model.ema_op
self.summary = model.summary
self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list)
opt_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
# Define train op
with tf.control_dependencies([opt_op]):
self.train_op = tf.group(self.ema_op)
def update_hyper_param(self):
assign_hyper_ops = []
self._mu = tf.identity(tf.cond(
self._do_tune, lambda: self.get_mu_tensor(),
lambda: self._mu_var))
with tf.control_dependencies([self._mu]):
self._lr = tf.identity(tf.cond(
self._do_tune, lambda: self.get_lr_tensor(),
lambda: self._lr_var))
with tf.control_dependencies([self._mu, self._lr]):
if self._use_unsmoothed_lr_mu:
assign_hyper_ops.append(tf.assign(self._mu_var, self._mu) )
assign_hyper_ops.append(tf.assign(self._lr_var, self._lr) )
else:
self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu
self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr
with tf.control_dependencies([self._mu, self._lr] ):
assign_hyper_ops.append(tf.assign(self._mu_var, self._mu) )
assign_hyper_ops.append(tf.assign(self._lr_var, self._lr) )
assign_hyper_op = tf.group(*assign_hyper_ops)
return assign_hyper_op
deterministic_mlp_regressor.py 文件源码
项目:rllabplusplus
作者: shaneshixiang
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def predict_sym(self, xs):
return L.get_output(self.l_out, xs)
# def fit(self, xs, ys):
# if self._normalize_inputs:
# # recompute normalizing constants for inputs
# new_mean = np.mean(xs, axis=0, keepdims=True)
# new_std = np.std(xs, axis=0, keepdims=True) + 1e-8
# tf.get_default_session().run(tf.group(
# tf.assign(self._x_mean_var, new_mean),
# tf.assign(self._x_std_var, new_std),
# ))
# inputs = [xs, ys]
# loss_before = self._optimizer.loss(inputs)
# if self._name:
# prefix = self._name + "_"
# else:
# prefix = ""
# logger.record_tabular(prefix + 'LossBefore', loss_before)
# self._optimizer.optimize(inputs)
# loss_after = self._optimizer.loss(inputs)
# logger.record_tabular(prefix + 'LossAfter', loss_after)
# logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
def __init__(self, config, model):
assert isinstance(model, Model)
self.config = config
self.model = model
self.opt = tf.train.AdagradOptimizer(config.init_lr)
self.loss = model.get_loss()
self.var_list = model.get_var_list()
self.global_step = model.get_global_step()
self.ema_op = model.ema_op
self.summary = model.summary
self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list)
opt_op = self.opt.apply_gradients(self.grads, global_step=self.global_step)
# Define train op
with tf.control_dependencies([opt_op]):
self.train_op = tf.group(self.ema_op)
def _build_train_op(self):
"""Build training specific ops for the graph."""
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
tf.summary.scalar('learning rate', self.lrn_rate)
trainable_variables = tf.trainable_variables()
grads = tf.gradients(self.cost, trainable_variables)
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
apply_op = optimizer.apply_gradients(
zip(grads, trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.train_op = tf.group(*train_ops)
# TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
def update_target_network(source_network, target_network, update_rate):
target_network_update = []
for v in source_network.variables():
# this is equivalent to target = (1-alpha) * target + alpha * source
# print ("source: " + v.name + " : " + str(v.get_shape()))
pass
for v in target_network.variables():
# this is equivalent to target = (1-alpha) * target + alpha * source
# print ("target: " + v.name + " : " + str(v.get_shape()))
pass
for v_source, v_target in zip(source_network.variables(), target_network.variables()):
# this is equivalent to target = (1-alpha) * target + alpha * source
update_op = v_target.assign_sub(update_rate * (v_target - v_source))
target_network_update.append(update_op)
return tf.group(*target_network_update)
# def concat_nn_input(self, input1, input2):
# return tf.concat(1, [input1, input2])
# def add_pow_values(self, values):
# return self.concat_nn_input(values, 0.01 * tf.pow(values, [2 for i in range(self.action_size)]))