def omniglot():
sess = tf.InteractiveSession()
""" def wrapper(v):
return tf.Print(v, [v], message="Printing v")
v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
temp = wrapper(v)
#with tf.control_dependencies([temp]):
temp.eval()
print 'Hello'"""
def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:]
val = tf.cast(val, V.dtype)
def body(_, (v, d2, chg)):
d2_int = tf.cast(d2, tf.int32)
return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
return Z
python类Print()的实例源码
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
def init_var(self):
self.rand_h = tf.random_uniform([1], 1.0 - float(self.rnd_hflip), 1.0)
self.rand_v = tf.random_uniform([1], 1.0 - float(self.rnd_vflip), 1.0)
self.rand_t = tf.random_uniform(
[1], 1.0 - float(self.rnd_transpose), 1.0)
self.offset = tf.random_uniform(
[2], dtype='int32', maxval=self.padding * 2 + self.shrink)
if self._debug:
self.offset = tf.Print(self.offset,
['Forward RND module', self.offset])
if self.rnd_size:
self.space = 2 * self.padding - self.offset
self.offset20 = tf.random_uniform(
[], dtype='int32', maxval=self.space[0] * 2) - self.space[0]
self.offset21 = tf.random_uniform(
[], dtype='int32', maxval=self.space[1] * 2) - self.space[1]
self.offset2 = tf.pack([self.offset20, self.offset21])
else:
self.offset2 = tf.zeros([2], dtype='int32')
pass
def test_vgg():
vgg = Vgg16()
image_tensor = tf.placeholder(tf.float32)
with tf.Session() as sess:
vgg.build(image_tensor)
init = tf.initialize_all_variables()
sess.run(init)
load_feature_layer_params('/Users/dtong/code/data/tf-image-interpreter/pretrain/vgg16_weights.npz', sess)
for v in tf.get_collection(tf.GraphKeys.VARIABLES):
print_op = tf.Print(v, [v], message=v.name, first_n=10)
sess.run(print_op)
roidb = RoiDb('val.txt', 2007)
batch_gen = BatchGenerator(roidb)
for i in range(10):
image, scale, bboxes = batch_gen.next_batch()
print(sess.run(vgg.conv5_3, feed_dict={image_tensor: image}))
def main():
roidb = RoiDb('val.txt', 2007)
batch_gen = BatchGenerator(roidb)
image_tensor = tf.placeholder(dtype=tf.float32)
scale_tensor = tf.placeholder(dtype=tf.float32)
bboxes_tensor = tf.placeholder(dtype=tf.float32)
p_op = tf.Print(image_tensor, [tf.shape(image_tensor), scale_tensor, bboxes_tensor])
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
coord = tf.train.Coordinator()
queue_threads = queue_runner.start_queue_runners(sess, coord=coord)
for i in range(10):
if coord.should_stop():
break
image, scale, bboxes = batch_gen.next_batch()
sess.run([p_op], feed_dict={image_tensor: image, scale_tensor: scale, bboxes_tensor:bboxes})
coord.request_stop()
coord.join(queue_threads)
def generate(self, image, scale, bboxes):
shape = tf.shape(image)
# TODO: NotImplementedError: Negative start indices are not currently supported
# height, width = shape[-2:]
# height, width = shape[-2:]
height = shape[1]
width = shape[2]
if self._debug:
height = tf.Print(height, [height], message='image height: ')
width = tf.Print(width, [width], message='image width: ')
anchors = self._generate_valid_anchors(width, height)
overlaps = self._calculate_overlaps(tf.cast(anchors, dtype=tf.float32), tf.cast(bboxes, dtype=tf.float32))
labels = self._generate_labels(overlaps)
labels = self._subsample_positive(labels)
labels = self._subsample_negative(labels)
return labels
def _generate_labels(self, overlaps):
labels = tf.Variable(tf.ones(shape=(tf.shape(overlaps)[0],), dtype=tf.float32) * -1, trainable=False,
validate_shape=False)
gt_max_overlaps = tf.arg_max(overlaps, dimension=0)
anchor_max_overlaps = tf.arg_max(overlaps, dimension=1)
mask = tf.one_hot(anchor_max_overlaps, tf.shape(overlaps)[1], on_value=True, off_value=False)
max_overlaps = tf.boolean_mask(overlaps, mask)
if self._debug:
max_overlaps = tf.Print(max_overlaps, [max_overlaps])
labels = tf.scatter_update(labels, gt_max_overlaps, tf.ones((tf.shape(gt_max_overlaps)[0],)))
# TODO: extract config object
over_threshold_mask = tf.reshape(tf.where(max_overlaps > 0.5), (-1,))
if self._debug:
over_threshold_mask = tf.Print(over_threshold_mask, [over_threshold_mask], message='over threshold index : ')
labels = tf.scatter_update(labels, over_threshold_mask, tf.ones((tf.shape(over_threshold_mask)[0],)))
# TODO: support clobber positive in the origin implement
below_threshold_mask = tf.reshape(tf.where(max_overlaps < 0.3), (-1,))
if self._debug:
below_threshold_mask = tf.Print(below_threshold_mask, [below_threshold_mask], message='below threshold index : ')
labels = tf.scatter_update(labels, below_threshold_mask, tf.zeros((tf.shape(below_threshold_mask)[0],)))
return labels
def bag_hinge_loss(config, preds, sent_mask, flip_sent_mask, hete_mask,
sent_trgt, sent_num):
""" HINGE LOSS:
DEFINED AS: MAX(0, M - MIN(SENT+) - MAX(SENT-))
THIS ONLY APPLIES TO HETE BAGS.
"""
flip_sent_trgt = \
tf.constant(1, shape=[config.batch_size,sent_num], dtype=config.data_type) - \
sent_trgt
pos_preds = preds + flip_sent_trgt + flip_sent_mask # [batch_size, sent_num]
neg_preds = preds * flip_sent_trgt * sent_mask # [batch_size, sent_num]
min_pos_pred = tf.reduce_min(pos_preds, 1)
# min_pos_pred = tf.Print(min_pos_pred, [min_pos_pred], message='min_pos_pred')
max_neg_pred = tf.reduce_max(neg_preds, 1)
# max_neg_pred = tf.Print(max_neg_pred, [max_neg_pred], message='max_neg_pred')
hinge_loss = hete_mask * tf.reduce_max(tf.pack(
[tf.constant(0, shape=[config.batch_size], dtype=config.data_type),
(0.20 - min_pos_pred + max_neg_pred)], axis=1), 1) # [batch_size]
# hinge_loss = tf.Print(hinge_loss, [hinge_loss], message='hinge_loss', summarize=20)
avg_hinge_loss = tf.reduce_sum(hinge_loss) / (tf.reduce_sum(hete_mask) + 1e-12)
return avg_hinge_loss
def omniglot():
sess = tf.InteractiveSession()
""" def wrapper(v):
return tf.Print(v, [v], message="Printing v")
v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
temp = wrapper(v)
#with tf.control_dependencies([temp]):
temp.eval()
print 'Hello'"""
def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:]
val = tf.cast(val, V.dtype)
def body(_, (v, d2, chg)):
d2_int = tf.cast(d2, tf.int32)
return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
return Z
def distorted_inputs():
"""Construct distorted input for CIFAR training using the Reader ops.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
Raises:
ValueError: If no data_dir
"""
with tf.variable_scope('distorted_inputs'):
if not FLAGS.train_data_dir:
raise ValueError('Please supply a data_dir')
data_dir = FLAGS.train_data_dir
images, labels, seq_lengths = ocr_input.distorted_inputs(data_dir=data_dir,
batch_size=FLAGS.batch_size)
# seq_lengths = tf.Print(seq_lengths, [seq_lengths], "seq_lengths")
return images, labels, seq_lengths
def create_ctc_loss(logits, labels, timesteps, label_seq_lengths):
with tf.variable_scope('CTC_Loss'):
print()
print("Labels shape")
print(labels)
print()
print("Logits shape")
print(logits)
print()
print("Labels len shape")
print(label_seq_lengths)
# logits = tf.Print(logits, [logits], "Logits")
ctc_loss = tf.nn.ctc_loss(labels,
logits,
timesteps)
cost = tf.reduce_mean(ctc_loss, name='ctc')
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return cost
def main(unused_argv):
if FLAGS.log_dir is None or FLAGS.log_dir == "":
raise ValueError("Must specify an explicit `log_dir`")
if FLAGS.data_dir is None or FLAGS.data_dir == "":
raise ValueError("Must specify an explicit `data_dir`")
device, target = device_and_target()
with tf.device(device):
images = tf.placeholder(tf.float32, [None, 784], name='image_input')
labels = tf.placeholder(tf.float32, [None], name='label_input')
data = read_data_sets(FLAGS.data_dir,
one_hot=False,
fake_data=False)
logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)
loss = mnist.loss(logits, labels)
loss = tf.Print(loss, [loss], message="Loss = ")
train_op = mnist.training(loss, FLAGS.learning_rate)
with tf.train.MonitoredTrainingSession(
master=target,
is_chief=(FLAGS.task_index == 0),
checkpoint_dir=FLAGS.log_dir) as sess:
while not sess.should_stop():
xs, ys = data.train.next_batch(FLAGS.batch_size, fake_data=False)
sess.run(train_op, feed_dict={images:xs, labels:ys})
def tf_debug_gradient(x, y, verbose=True):
"""
Print the theoretical and numeric gradients, and the absolute difference between the two
Args:
x (tf.Variable): input variable
y (tf.Variable): output variable
verbose: switch display of information
Returns:
the theoretical and numeric gradient
"""
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
if verbose:
print(y.eval())
gt, gn = tf.test.compute_gradient(
x, [d.value for d in x.get_shape()], y, [d.value for d in y.get_shape()], delta=1e-2)
if verbose:
print(np.concatenate((gt, gn, np.round(np.abs(gt-gn),2)), len(gt.shape) - 1))
print(y.eval())
return gt, gn
def __match_with_labels(self,gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores,jaccard,matching_threshold,gt_labels,gt_bboxes,num_anchors):
#debugging info
#jaccard = tf.Print(jaccard, [gt_labels], "gt_labels")
#match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5).
mask = tf.reduce_max (jaccard, axis = 0) > matching_threshold
mask_inds = tf.argmax(jaccard, axis = 0)
matched_labels = tf.gather(gt_labels, mask_inds)
gt_anchor_labels = tf.where(mask, matched_labels, gt_anchor_labels)
gt_anchor_bboxes = tf.where(mask, tf.gather(gt_bboxes, mask_inds),gt_anchor_bboxes)
gt_anchor_scores = tf.reduce_max(jaccard, axis= 0)
#matching each ground truth box to the default box with the best jaccard overlap
use_no_miss = True
if use_no_miss:
gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores = self.__match_no_miss(gt_anchor_labels, \
gt_anchor_bboxes, gt_anchor_scores, jaccard, \
gt_labels, gt_bboxes, num_anchors)
return gt_anchor_labels,gt_anchor_bboxes,gt_anchor_scores
def mixture_loss(pred, y, n_mixtures, batch_size):
pred = tf.verify_tensor_all_finite(pred, "Pred not finite!")
out_pi, out_sigma, out_mu, out_rho = splitMix(pred, n_mixtures, batch_size)
result_binorm, result_delta = tf_bivariate_normal(y, out_mu, out_sigma, out_rho, n_mixtures, batch_size)
result_binorm = tf.verify_tensor_all_finite(result_binorm, "Result not finite1!")
result_weighted = tf.mul(result_binorm, out_pi)
result_weighted = tf.verify_tensor_all_finite(result_weighted, "Result not finite2!")
result_raw = tf.reduce_sum(result_weighted + epsilon, 1, keep_dims=True)
result_raw = tf.Print(result_raw, [tf.reduce_sum(result_raw)], "Sum of weighted density. If zero, sigma is too small: ")
result_raw = tf.Print(result_raw, [tf.reduce_max(result_raw)], "Max of weighted density. If zero, sigma is too small: ")
result_raw = tf.verify_tensor_all_finite(result_raw, "Result not finite3!")
result = -tf.log(result_raw + e)
result = tf.verify_tensor_all_finite(result, "Result not finite4!")
result = tf.reduce_sum(result)
result = tf.verify_tensor_all_finite(result, "Result not finite5!")
return result
# Returns the LSTM stack created based on the parameters.
# Processes several batches at once.
# Input shape is: (parameters['batch_size'], parameters['n_steps'], parameters['n_input'])
def test_tabular_UCB(self):
nb_states = 3
nb_actions = 2
with tf.Graph().as_default():
tf.set_random_seed(1)
inputs_t = tf.random_uniform(shape=[1], minval=0, maxval=3, dtype=tf.int32)
# inputs_t = tf.Print(inputs_t, data=[inputs_t], message='inputs_t')
Qs = tf.ones([nb_states, nb_actions], dtype=tf.float32)
# Qs = tf.Print(Qs, data=[Qs], message='Qs', summarize=12)
actions_t, probs = capacities.tabular_UCB(Qs, inputs_t)
# actions_t = tf.Print(actions_t, data=[timestep, actions_t], message='actions_t')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
inputs, actions = sess.run([inputs_t, actions_t])
inputs, actions = sess.run([inputs_t, actions_t])
inputs, actions = sess.run([inputs_t, actions_t])
self.assertEqual(np.array_equal(inputs, [ 0 ]), True)
self.assertEqual(np.array_equal(actions, [ 0 ]), True)
def _normalize(self, x, mean, mean_sq, message):
# make sure this is called with a variable scope
shape = x.get_shape().as_list()
assert len(shape) == 4
self.gamma_driver = tf.get_variable("gamma_driver", [shape[-1]],
initializer=tf.random_normal_initializer(0., 0.02))
gamma = tf.exp(self.gamma_driver)
gamma = tf.reshape(gamma, [1, 1, 1, -1])
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
beta = tf.reshape(self.beta, [1, 1, 1, -1])
assert self.epsilon is not None
assert mean_sq is not None
assert mean is not None
std = tf.sqrt(self.epsilon + mean_sq - tf.square(mean))
out = x - mean
out = out / std
# out = tf.Print(out, [tf.reduce_mean(out, [0, 1, 2]),
# tf.reduce_mean(tf.square(out - tf.reduce_mean(out, [0, 1, 2], keep_dims=True)), [0, 1, 2])],
# message, first_n=-1)
out = out * gamma
out = out + beta
return out
def _normalize(self, x, mean, mean_sq, message):
# make sure this is called with a variable scope
shape = x.get_shape().as_list()
assert len(shape) == 4
self.gamma_driver = tf.get_variable("gamma_driver", shape[1:],
initializer=tf.random_normal_initializer(0., 0.02))
gamma = tf.exp(self.gamma_driver)
gamma = tf.expand_dims(gamma, 0)
self.beta = tf.get_variable("beta", shape[1:],
initializer=tf.constant_initializer(0.))
beta = tf.expand_dims(self.beta, 0)
assert self.epsilon is not None
assert mean_sq is not None
assert mean is not None
std = tf.sqrt(self.epsilon + mean_sq - tf.square(mean))
out = x - mean
out = out / std
# out = tf.Print(out, [tf.reduce_mean(out, [0, 1, 2]),
# tf.reduce_mean(tf.square(out - tf.reduce_mean(out, [0, 1, 2], keep_dims=True)), [0, 1, 2])],
# message, first_n=-1)
out = out * gamma
out = out + beta
return out
def _normalize(self, x, mean, mean_sq, message):
# make sure this is called with a variable scope
shape = x.get_shape().as_list()
assert len(shape) == 4
self.gamma = tf.get_variable("gamma", [shape[-1]],
initializer=tf.random_normal_initializer(1., 0.02))
gamma = tf.reshape(self.gamma, [1, 1, 1, -1])
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
beta = tf.reshape(self.beta, [1, 1, 1, -1])
assert self.epsilon is not None
assert mean_sq is not None
assert mean is not None
std = tf.sqrt(self.epsilon + mean_sq - tf.square(mean))
out = x - mean
out = out / std
# out = tf.Print(out, [tf.reduce_mean(out, [0, 1, 2]),
# tf.reduce_mean(tf.square(out - tf.reduce_mean(out, [0, 1, 2], keep_dims=True)), [0, 1, 2])],
# message, first_n=-1)
out = out * gamma
out = out + beta
return out
def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma):
# M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ')
# M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ')
# Content adressing, see Chapter 3.3.1:
num = beta * _cosine_distance(M, k)
w_c = K.softmax(num) # It turns out that equation (5) is just softmax.
# Location adressing, see Chapter 3.3.2:
# Equation 7:
w_g = (g * w_c) + (1-g)*w_tm1
# C_s is the circular convolution
#C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3)
# Equation 8:
# TODO: Explain
C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1)
w_tilda = K.batch_dot(C_s, w_g)
# Equation 9:
w_out = _renorm(w_tilda ** gamma)
return w_out
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
def _fc_relu_layers(self, bottom, dim, name = None):
with tf.name_scope(name) as scope:
shape = int(np.prod(bottom.get_shape()[1:]))
weights = tf.Variable(tf.truncated_normal([shape, dim],
dtype=tf.float32, stddev=0.005), name='weights')
bias = tf.Variable(tf.constant(1.0, shape=[dim], dtype=tf.float32), name='biases')
bottom_flat = tf.reshape(bottom, [-1, shape])
fc_weights = tf.nn.bias_add(tf.matmul(bottom_flat, weights), bias)
self.parameters[name] = [weights, bias]
if not tf.get_variable_scope().reuse:
weight_decay = tf.multiply(tf.nn.l2_loss(weights), self.wd,
name='fc_relu_weight_loss')
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
weight_decay)
top = tf.nn.relu(fc_weights, name=scope)
_activation_summary(top)
top = tf.Print(top, [tf.shape(top)], message='Shape of %s' % name, first_n = 1, summarize=4)
return top
def _fc_layers(self, bottom, dim, name = None):
with tf.name_scope(name) as scope:
shape = int(np.prod(bottom.get_shape()[1:]))
weights = tf.Variable(tf.truncated_normal([shape, dim],
dtype=tf.float32, stddev=0.005), name='weights')
bias = tf.Variable(tf.constant(1.0, shape=[dim], dtype=tf.float32), name='biases')
bottom_flat = tf.reshape(bottom, [-1, shape])
top = tf.nn.bias_add(tf.matmul(bottom_flat, weights), bias, name=scope)
self.parameters[name] = [weights, bias]
if not tf.get_variable_scope().reuse:
weight_decay = tf.multiply(tf.nn.l2_loss(weights), self.wd,
name='fc_weight_loss')
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
weight_decay)
_activation_summary(top)
top = tf.Print(top, [tf.shape(top)], message='Shape of %s' % name, first_n = 1, summarize=4)
return top
def print_(var, name: str, first_n=10, summarize=5):
"""Util for debugging, by printing values of tf.Variable `var` during training"""
# name = (next(k for k, v in globals().items() if v == var) # get name automagically
# if name is None else name) # TODO make work for list ?
# name = (next(k for k, v in globals().items() if id(v) == id(var))
# if name is None else name)
# print(name)
# return ([k for k, v in globals().items() if id(v) == id(var)]
# if name is None else name)
try:
return tf.Print(var, [var], '{}: '.format(name), first_n=first_n,
summarize=summarize)
except(TypeError): # variables are already in a list
return tf.Print(var, var, '{}: '.format(name), first_n=first_n,
summarize=summarize)
def apply_gradients(self, grads):
coldOptim = tf.train.MomentumOptimizer(
self._cold_lr * (1. - self._momentum), self._momentum)
def coldSGDstart():
sgd_step_op = tf.assign_add(self.sgd_step, 1)
coldOptim_op = coldOptim.apply_gradients(grads)
if KFAC_DEBUG:
with tf.control_dependencies([sgd_step_op, coldOptim_op]):
sgd_step_op = tf.Print(
sgd_step_op, [self.sgd_step, tf.convert_to_tensor('doing cold sgd step')])
return tf.group(*[sgd_step_op, coldOptim_op])
kfacOptim_op, qr = self.apply_gradients_kfac(grads)
def warmKFACstart():
return kfacOptim_op
return tf.cond(tf.greater(self.sgd_step, self._cold_iter), warmKFACstart, coldSGDstart), qr
def add_training_op(self):
loss=self.total_loss
opt1=tf.train.AdagradOptimizer(self.config.lr)
opt2=tf.train.AdagradOptimizer(self.config.emb_lr)
ts=tf.trainable_variables()
gs=tf.gradients(loss,ts)
gs_ts=zip(gs,ts)
gt_emb,gt_nn=[],[]
for g,t in gs_ts:
#print t.name,g.name
if "Embed/embedding:0" in t.name:
#g=tf.Print(g,[g.get_shape(),t.get_shape()])
gt_emb.append((g,t))
#print t.name
else:
gt_nn.append((g,t))
#print t.name
train_op1=opt1.apply_gradients(gt_nn)
train_op2=opt2.apply_gradients(gt_emb)
train_op=[train_op1,train_op2]
return train_op
def bits_err_per_seq(out, expected, nsteps):
rel_pred = predict(out, nsteps)
rel_pred = tf.Print(
rel_pred,
[tf.slice(rel_pred, [0, 0, 0], [1, -1, 1])],
"predicted",
summarize=20,
)
expected = tf.Print(
expected,
[tf.slice(expected, [0, 0, 0], [1, -1, 1])],
"expected",
summarize=20,
)
diff = rel_pred - expected
return tf.reduce_mean(tf.reduce_sum(tf.abs(diff), [1, 2]))
def _get_optimizer(self, training_iters, global_step):
if self.optimizer == "momentum":
learning_rate = self.opt_kwargs.pop("learning_rate", 0.2)
decay_rate = self.opt_kwargs.pop("decay_rate", 0.95)
self.learning_rate_node = tf.train.exponential_decay(learning_rate=learning_rate,
global_step=global_step,
decay_steps=training_iters,
decay_rate=decay_rate,
staircase=True)
optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate_node, momentum=0.9,
**self.opt_kwargs).minimize(self.net.cost,
global_step=global_step)
# optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate_node, momentum=0.9,
# **self.opt_kwargs)
# gvs = optimizer.compute_gradients(self.net.cost)
# # [print(grad) for grad, var in gvs]
# tf.Print(self.net.cost,self.net.cost)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
# train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)
elif self.optimizer == "adam":
learning_rate = self.opt_kwargs.pop("learning_rate", 0.001)
self.learning_rate_node = tf.Variable(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate_node,
**self.opt_kwargs).minimize(self.net.cost,
global_step=global_step)
return optimizer
def _filter_inside_anchors(self, all_anchors, height, width):
# filter anchors
inds_inside = tf.where(
(all_anchors[:, 0] > 0) &
(all_anchors[:, 1] > 0) &
(all_anchors[:, 2] < width) &
(all_anchors[:, 3] < height)
)
if self._debug:
inds_inside = tf.Print(inds_inside, [tf.shape(inds_inside)], message='inside anchors: ')
anchors = tf.gather(all_anchors, inds_inside)
return anchors
def _generate_all_anchors(self, shifts):
num_anchors = self._anchors.shape[0]
num_shifts = tf.shape(shifts)[0]
all_anchors = (self._anchors.reshape(1, num_anchors, 4) +
tf.transpose(tf.reshape(shifts, (1, num_shifts, 4)), perm=(1, 0, 2)))
all_anchors = tf.reshape(all_anchors, (num_shifts * num_anchors, 4))
if self._debug:
num_all_anchors = num_shifts * num_anchors
tf.Print(num_all_anchors, [num_all_anchors], message='all anchor: ')
return all_anchors