def __init__(self, sess, checkpoint_dir, log_dir, training_paths, testing_paths, roi, im_size, nclass,
batch_size=1, layers=3, features_root=32, conv_size=3, dropout=0.5, testing_gt_available=True,
loss_type='cross_entropy', class_weights=None):
self.sess = sess
self.checkpoint_dir = checkpoint_dir
self.log_dir = log_dir
self.training_paths = training_paths
self.testing_paths = testing_paths
self.testing_gt_available = testing_gt_available
self.nclass = nclass
self.im_size = im_size
self.roi = roi # (roi_order, roi_name)
self.batch_size = batch_size
self.layers = layers
self.features_root = features_root
self.conv_size = conv_size
self.dropout = dropout
self.loss_type = loss_type
self.class_weights = class_weights
self.build_model()
self.saver = tf.train.Saver(tf.trainable_variables() + tf.get_collection_ref('bn_collections'))
python类trainable_variables()的实例源码
def create_network(self,state_dim,action_dim,scope):
with tf.variable_scope(scope,reuse=False) as s:
state_input = tf.placeholder("float",[None,None,state_dim])
# creating the recurrent part
lstm_cell=rnn.BasicLSTMCell(LSTM_HIDDEN_UNIT)
lstm_output,lstm_state=tf.nn.dynamic_rnn(cell=lstm_cell,inputs=state_input,dtype=tf.float32)
W3 = tf.Variable(tf.random_uniform([lstm_cell.state_size,action_dim],-3e-3,3e-3))
b3 = tf.Variable(tf.random_uniform([action_dim],-3e-3,3e-3))
action_output = tf.tanh(tf.matmul(lstm_state,W3) + b3)
net = [v for v in tf.trainable_variables() if scope in v.name]
return state_input,action_output,net
def get_forward_parameters(vocab_size=4716):
t_vars = tf.trainable_variables()
h1_vars_weight = [var for var in t_vars if 'hidden_1' in var.name and 'weights' in var.name]
h1_vars_biases = [var for var in t_vars if 'hidden_1' in var.name and 'biases' in var.name]
h2_vars_weight = [var for var in t_vars if 'hidden_2' in var.name and 'weights' in var.name]
h2_vars_biases = [var for var in t_vars if 'hidden_2' in var.name and 'biases' in var.name]
o1_vars_weight = [var for var in t_vars if 'output_1' in var.name and 'weights' in var.name]
o1_vars_biases = [var for var in t_vars if 'output_1' in var.name and 'biases' in var.name]
o2_vars_weight = [var for var in t_vars if 'output_2' in var.name and 'weights' in var.name]
o2_vars_biases = [var for var in t_vars if 'output_2' in var.name and 'biases' in var.name]
h1_vars_biases = tf.reshape(h1_vars_biases[0],[1,FLAGS.hidden_size_1])
h2_vars_biases = tf.reshape(h2_vars_biases[0],[1,FLAGS.hidden_size_2])
o1_vars_biases = tf.reshape(o1_vars_biases[0],[1,FLAGS.hidden_size_1])
o2_vars_biases = tf.reshape(o2_vars_biases[0],[1,vocab_size])
vars_1 = tf.concat((h1_vars_weight[0],h1_vars_biases),axis=0)
vars_2 = tf.concat((h2_vars_weight[0],h2_vars_biases),axis=0)
vars_3 = tf.concat((o1_vars_weight[0],o1_vars_biases),axis=0)
vars_4 = tf.concat((o2_vars_weight[0],o2_vars_biases),axis=0)
return [vars_1,vars_2,vars_3,vars_4]
def get_forward_parameters(vocab_size=4716):
t_vars = tf.trainable_variables()
h1_vars_weight = [var for var in t_vars if 'hidden_1' in var.name and 'weights' in var.name]
h1_vars_biases = [var for var in t_vars if 'hidden_1' in var.name and 'biases' in var.name]
h2_vars_weight = [var for var in t_vars if 'hidden_2' in var.name and 'weights' in var.name]
h2_vars_biases = [var for var in t_vars if 'hidden_2' in var.name and 'biases' in var.name]
o1_vars_weight = [var for var in t_vars if 'output_1' in var.name and 'weights' in var.name]
o1_vars_biases = [var for var in t_vars if 'output_1' in var.name and 'biases' in var.name]
o2_vars_weight = [var for var in t_vars if 'output_2' in var.name and 'weights' in var.name]
o2_vars_biases = [var for var in t_vars if 'output_2' in var.name and 'biases' in var.name]
h1_vars_biases = tf.reshape(h1_vars_biases[0],[1,FLAGS.hidden_size_1])
h2_vars_biases = tf.reshape(h2_vars_biases[0],[1,FLAGS.hidden_size_2])
o1_vars_biases = tf.reshape(o1_vars_biases[0],[1,FLAGS.hidden_size_1])
o2_vars_biases = tf.reshape(o2_vars_biases[0],[1,vocab_size])
vars_1 = tf.concat((h1_vars_weight[0],h1_vars_biases),axis=0)
vars_2 = tf.concat((h2_vars_weight[0],h2_vars_biases),axis=0)
vars_3 = tf.concat((o1_vars_weight[0],o1_vars_biases),axis=0)
vars_4 = tf.concat((o2_vars_weight[0],o2_vars_biases),axis=0)
return [vars_1,vars_2,vars_3,vars_4]
def build_model2(self):
self.weights3, self.biases3 = self.get_en_z_variables()
#training Ez
self.fake_images = self.generate(self.z, self.y, weights=self.weights1, biases=self.biases1)
self.e_z= self.encode_z(self.fake_images, weights=self.weights3, biases=self.biases3)
self.loss_z = tf.reduce_mean(tf.square(tf.contrib.layers.flatten(self.e_z - self.z)))
t_vars = tf.trainable_variables()
self.g_vars = [var for var in t_vars if 'gen' in var.name]
self.enz_vars = [var for var in t_vars if 'enz' in var.name]
print len(self.g_vars)
print len(self.enz_vars)
self.saver = tf.train.Saver(self.g_vars)
self.saver_z = tf.train.Saver(self.g_vars + self.enz_vars)
#Training the Encode_y
def build_model4(self):
self.weights3, self.biases3 = self.get_en_z_variables()
self.weights4, self.biases4 = self.get_en_y_variables()
self.e_z = self.encode_z(self.images, weights=self.weights3, biases=self.biases3)
self.e_y = self.encode_y(self.images, weights=self.weights4, biases=self.biases4)
#Changing y : + 1 or +2 or +3
self.e_y = tf.one_hot(tf.arg_max(self.e_y, 1) + self.extend_value, 10)
self.fake_images = self.generate(self.e_z, self.e_y, weights=self.weights1, biases=self.biases1)
t_vars = tf.trainable_variables()
self.g_vars = [var for var in t_vars if 'gen' in var.name]
self.enz_vars = [var for var in t_vars if 'enz' in var.name]
self.eny_vars = [var for var in t_vars if 'eny' in var.name]
self.saver = tf.train.Saver(self.g_vars)
self.saver_z = tf.train.Saver(self.g_vars + self.enz_vars)
self.saver_y = tf.train.Saver(self.eny_vars)
#do train
def get_training_tensors(self, learning_rate = 0.001, grad_clip = 5):
#-----------------------------------------------------------------------
# Build a loss function
#-----------------------------------------------------------------------
with tf.name_scope('targets-encode'):
y_one_hot = tf.one_hot(self.targets, self.n_classes)
y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape())
with tf.name_scope('loss'):
loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
labels=y_reshaped)
loss = tf.reduce_mean(loss)
tf.summary.scalar('loss', loss)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.name_scope('optimizer'):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return loss, optimizer
def get_optimizer(self, learning_rate = 0.001, grad_clip = 5):
#-----------------------------------------------------------------------
# Build a loss function
#-----------------------------------------------------------------------
with tf.variable_scope('loss'):
loss = tf.losses.mean_squared_error(self.target, self.output)
#-----------------------------------------------------------------------
# Build the optimizer
#-----------------------------------------------------------------------
with tf.variable_scope('optimizer'):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return optimizer, loss
def trainable_variables_on_device(self, rel_device_num, abs_device_num,
writable):
"""Return the set of trainable variables on the specified device.
Args:
rel_device_num: local worker device index.
abs_device_num: global graph device index.
writable: whether the returned variables is writable or read-only.
Returns:
Return the set of trainable variables on the specified device.
"""
del abs_device_num
params_refs = tf.trainable_variables()
if writable:
return params_refs
params = []
for param in params_refs:
var_name = param.name.split(':')[0]
_, var_get_op = self.variable_mgr.staging_vars_on_devices[rel_device_num][
var_name]
params.append(var_get_op)
return params
def trainable_variables_on_device(self,
rel_device_num,
abs_device_num,
writable=False):
"""Return the set of trainable variables on device.
Args:
rel_device_num: local worker device index.
abs_device_num: global graph device index.
writable: whether to get a reference to the underlying variable.
Returns:
The set of trainable variables on the specified device.
"""
del rel_device_num, writable
if self.each_tower_has_variables():
params = [
v for v in tf.trainable_variables()
if v.name.startswith('v%s/' % abs_device_num)
]
else:
params = tf.trainable_variables()
return params
def savable_variables(self):
"""Returns a list/dict of savable variables to pass to tf.train.Saver."""
params = {}
for v in tf.global_variables():
assert (v.name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0/')
or v.name in ('global_step:0', 'loss_scale:0',
'loss_scale_normal_steps:0')), (
'Invalid global variable: %s' % v)
# We store variables in the checkpoint with the shadow variable prefix
# removed so we can evaluate checkpoints in non-distributed replicated
# mode. The checkpoints can also be loaded for training in
# distributed_replicated mode.
name = self._strip_port(self._remove_shadow_var_prefix_if_present(v.name))
params[name] = v
for v in tf.local_variables():
# Non-trainable variables, such as batch norm moving averages, do not have
# corresponding global shadow variables, so we add them here. Trainable
# local variables have corresponding global shadow variables, which were
# added in the global variable loop above.
if v.name.startswith('v0/') and v not in tf.trainable_variables():
params[self._strip_port(v.name)] = v
return params
def _optimize(self):
'''
NOTE: The author said that there was no need for 100 d_iter per 100 iters.
https://github.com/igul222/improved_wgan_training/issues/3
'''
global_step = tf.Variable(0, name='global_step')
lr = self.arch['training']['lr']
b1 = self.arch['training']['beta1']
b2 = self.arch['training']['beta2']
optimizer = tf.train.AdamOptimizer(lr, b1, b2)
g_vars = tf.trainable_variables()
with tf.name_scope('Update'):
opt_g = optimizer.minimize(self.loss['G'], var_list=g_vars, global_step=global_step)
return {
'g': opt_g,
'global_step': global_step
}
a2_transformer_classification.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def loss(self, l2_lambda=0.0001): # 0.001
with tf.name_scope("loss"):
# input: `logits`:[batch_size, num_classes], and `labels`:[batch_size]
# output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss.
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
# print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,)
loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=()
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss
#def loss_seq2seq(self):
# with tf.variable_scope("loss"):
# losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label, logits=self.logits);#losses:[batch_size,self.decoder_sent_length]
# loss_batch=tf.reduce_sum(losses,axis=1)/self.decoder_sent_length #loss_batch:[batch_size]
# loss=tf.reduce_mean(loss_batch)
# l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda
# loss = loss + l2_losses
# return loss
def loss_nce(self,l2_lambda=0.0001): #0.0001-->0.001
"""calculate loss using (NCE)cross entropy here"""
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
if self.is_training: #training
#labels=tf.reshape(self.input_y,[-1]) #[batch_size,1]------>[batch_size,]
labels=tf.expand_dims(self.input_y,1) #[batch_size,]----->[batch_size,1]
loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
tf.nn.nce_loss(weights=tf.transpose(self.W_projection),#[hidden_size*2, num_classes]--->[num_classes,hidden_size*2]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
biases=self.b_projection, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
inputs=self.output_rnn_last,# [batch_size,hidden_size*2] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
num_sampled=self.num_sampled, #scalar. 100
num_classes=self.num_classes,partition_strategy="div")) #scalar. 1999
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
loss = loss + l2_losses
return loss
def build_optim(self, loss):
global_step = self.global_step
learn_rate = self.learn_rate
# We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers.
grads = self.average_gradients(self.tower_grads)
# Apply the gradients to adjust the shared variables.
apply_gradient_op = self.opt.apply_gradients(
grads, global_step=global_step)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
0.999, global_step)
variables_averages_op = variable_averages.apply(
tf.trainable_variables())
# Group all updates to into a single train op.
train_op = tf.group(apply_gradient_op, variables_averages_op)
# for m in self.sub_models:
# self.log.info(m.device)
# self.log.fatal('haha')
return train_op
def prepare_trainer(self, generator_loss, discriminator_loss):
'''Helper function for init_opt'''
all_vars = tf.trainable_variables()
g_vars = [var for var in all_vars if
var.name.startswith('g_')]
d_vars = [var for var in all_vars if
var.name.startswith('d_')]
generator_opt = tf.train.AdamOptimizer(self.generator_lr,
beta1=0.5)
self.generator_trainer =\
pt.apply_optimizer(generator_opt,
losses=[generator_loss],
var_list=g_vars)
discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
beta1=0.5)
self.discriminator_trainer =\
pt.apply_optimizer(discriminator_opt,
losses=[discriminator_loss],
var_list=d_vars)
self.log_vars.append(("g_learning_rate", self.generator_lr))
self.log_vars.append(("d_learning_rate", self.discriminator_lr))
def build_model(self, sess):
self.init_opt()
sess.run(tf.initialize_all_variables())
if len(self.model_path) > 0:
print("Reading model parameters from %s" % self.model_path)
all_vars = tf.trainable_variables()
# all_vars = tf.all_variables()
restore_vars = []
for var in all_vars:
if var.name.startswith('g_') or var.name.startswith('d_'):
restore_vars.append(var)
# print(var.name)
saver = tf.train.Saver(restore_vars)
saver.restore(sess, self.model_path)
istart = self.model_path.rfind('_') + 1
iend = self.model_path.rfind('.')
counter = self.model_path[istart:iend]
counter = int(counter)
else:
print("Created model with fresh parameters.")
counter = 0
return counter
model.py 文件源码
项目:Saliency_Detection_Convolutional_Autoencoder
作者: arthurmeyer
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def train(self, loss, global_step):
"""
Return a training step for the tensorflow graph
Args:
loss : loss to do sgd on
global_step : which step are we at
"""
opt = tf.train.AdamOptimizer(self.learning_rate)
grads = opt.compute_gradients(loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(self.moving_avg_decay, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def load_trainable_vars(sess,filename):
"""load a .npz archive and assign the value of each loaded
ndarray to the trainable variable whose name matches the
archive key. Any elements in the archive that do not have
a corresponding trainable variable will be returned in a dict.
"""
other={}
try:
tv=dict([ (str(v.name),v) for v in tf.trainable_variables() ])
for k,d in np.load(filename).items():
if k in tv:
print('restoring ' + k)
sess.run(tf.assign( tv[k], d) )
else:
other[k] = d
except IOError:
pass
return other
def get_trainable_variables(trainable_scopes):
"""Returns a list of variables to train.
Returns:
A list of variables to train by the optimizer.
"""
if trainable_scopes is None:
return tf.trainable_variables()
trainable_scopes = [scope.strip() for scope in trainable_scopes]
variables_to_train = []
for scope in trainable_scopes:
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
variables_to_train.extend(variables)
return variables_to_train
def build_model(self):
self.model = classmap[FLAGS.model_type](hidden_size=FLAGS.hidden,
vocab_size=self.vocab_size,
encoder_in_size=self.data.feats.shape[-1],
encoder_in_length=self.data.feats.shape[1],
decoder_in_length=self.data.decoder_in.shape[-1] - 1,
word2vec_weight=self.w2v_W,
embedding_size=FLAGS.embedding_dim,
neg_sample_num=self.sample_num,
start_id=self.vocab_processor._mapping['<BOS>'],
end_id=self.vocab_processor._mapping['<EOS>'],
Bk=FLAGS.K)
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.optimizer = tf.train.RMSPropOptimizer(FLAGS.lr)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.model.cost, tvars), 5)
self.updates = self.optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step)
self.saver = tf.train.Saver(tf.global_variables())
def get_number_of_trainable_parameters():
""" use default graph """
# https://stackoverflow.com/questions/38160940/ ...
LOGGER.debug('Now compute total number of trainable params...')
total_parameters = 0
for variable in tf.trainable_variables():
shape = variable.get_shape()
name = variable.name
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
LOGGER.debug(' layer name = {}, shape = {}, n_params = {}'.format(
name, shape, variable_parameters
))
total_parameters += variable_parameters
LOGGER.debug('Total parameters = %d' % total_parameters)
return total_parameters
def get_number_of_trainable_parameters():
""" use default graph """
# https://stackoverflow.com/questions/38160940/ ...
LOGGER.debug('Now compute total number of trainable params...')
total_parameters = 0
for variable in tf.trainable_variables():
shape = variable.get_shape()
name = variable.name
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
LOGGER.debug(' layer name = {}, shape = {}, n_params = {}'.format(
name, shape, variable_parameters
))
total_parameters += variable_parameters
LOGGER.debug('Total parameters = %d' % total_parameters)
return total_parameters
def testCreateLogisticClassifier(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = LogisticClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 2)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(update_ops, [])
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '')
self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateSingleclone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 5)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '')
self.assertDeviceEqual(v.device, 'CPU:0')
def get_update_op(self, loss, opts, global_step=None, max_gradient_norm=None, freeze_variables=None):
if loss is None:
return None
freeze_variables = freeze_variables or []
# compute gradient only for variables that are not frozen
frozen_parameters = [var.name for var in tf.trainable_variables()
if any(re.match(var_, var.name) for var_ in freeze_variables)]
params = [var for var in tf.trainable_variables() if var.name not in frozen_parameters]
self.params = params
gradients = tf.gradients(loss, params)
if max_gradient_norm:
gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
update_ops = []
for opt in opts:
with tf.variable_scope('gradients' if self.name is None else 'gradients_{}'.format(self.name)):
update_op = opt.apply_gradients(list(zip(gradients, params)), global_step=global_step)
update_ops.append(update_op)
return update_ops
def _create_optimizer(self):
print('Create optimizer... ')
with tf.variable_scope('training'):
self.global_step = tf.Variable(
0, dtype=tf.int32, trainable=False, name='global_step')
if not self.fw_only:
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
trainable_vars = tf.trainable_variables()
self.gradient_norms = []
self.train_ops = []
start = time.time()
for bucket_id in range(len(config.BUCKETS)):
clipped_grads, norm = tf.clip_by_global_norm(
tf.gradients(self.losses[bucket_id], trainable_vars),
config.MAX_GRAD_NORM)
self.gradient_norms.append(norm)
self.train_ops.append(self.optimizer.apply_gradients(
zip(clipped_grads, trainable_vars),
global_step=self.global_step))
print('Creating opt for bucket {:d} took {:.2f} seconds.'.format(
bucket_id, time.time() - start))
start = time.time()
def build_model(self):
self.inputs = tf.placeholder(tf.float32, [self.batch_size, self.input_size, self.input_size, 3], name='real_images')
# self.inputs = tf.placeholder(tf.float32, [None, self.input_size, self.input_size, 3], name='real_images')
try:
self.up_inputs = tf.image.resize_images(self.inputs, self.image_shape[0], self.image_shape[1], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
except ValueError:
# newer versions of tensorflow
self.up_inputs = tf.image.resize_images(self.inputs, [self.image_shape[0], self.image_shape[1]], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
self.images = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape, name='real_images')
# self.images = tf.placeholder(tf.float32, [None] + self.image_shape, name='real_images')
self.sample_images= tf.placeholder(tf.float32, [self.sample_size] + self.image_shape, name='sample_images')
# self.sample_images = tf.placeholder(tf.float32, [None] + self.image_shape, name='sample_images')
self.G = self.generator(self.inputs)
self.G_sum = tf.image_summary("G", self.G)
self.g_loss = tf.reduce_mean(tf.square(self.images-self.G))
self.g_loss_sum = tf.scalar_summary("g_loss", self.g_loss)
t_vars = tf.trainable_variables()
self.g_vars = [var for var in t_vars if 'g_' in var.name]
self.saver = tf.train.Saver()
def _flatgrad(self, loss, var_list):
""" A Tensorflow version of John Schulman's `flatgrad` function. It
computes the gradients but does NOT apply them (for now).
This is only called during the `init` of the TRPO graph, so I think it's
OK. Otherwise, wouldn't it be constantly rebuilding the computational
graph? Or doing something else? Eh, for now I think it's OK.
Params:
loss: The loss function we're optimizing, which I assume is always
scalar-valued.
var_list: The list of variables (from `tf.trainable_variables()`) to
take gradients. This should only be for the policynets.
Returns:
A single flat vector with all gradients concatenated.
"""
grads = tf.gradients(loss, var_list)
return tf.concat([tf.reshape(g, [-1]) for g in grads], axis=0)
def init_optimizer(self):
print("setting optimizer..")
# Gradients and SGD update operation for training the model
trainable_params = tf.trainable_variables()
if self.optimizer.lower() == 'adadelta':
self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'adam':
self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
elif self.optimizer.lower() == 'rmsprop':
self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
else:
self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
# Compute gradients of loss w.r.t. all trainable variables
gradients = tf.gradients(self.loss, trainable_params)
# Clip gradients by a given maximum_gradient_norm
clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm)
# Update the model
self.updates = self.opt.apply_gradients(
zip(clip_gradients, trainable_params), global_step=self.global_step)