def calc_reward(outputs):
# consider the action at the last time step
outputs = outputs[-1] # look at ONLY THE END of the sequence
outputs = tf.reshape(outputs, (batch_size, cell_out_size))
# get the baseline
b = tf.pack(baselines)
b = tf.concat(2, [b, b])
b = tf.reshape(b, (batch_size, (nGlimpses) * 2))
no_grad_b = tf.stop_gradient(b)
# get the action(classification)
p_y = tf.nn.softmax(tf.matmul(outputs, Wa_h_a) + Ba_h_a)
max_p_y = tf.arg_max(p_y, 1)
correct_y = tf.cast(labels_placeholder, tf.int64)
# reward for all examples in the batch
R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32)
reward = tf.reduce_mean(R) # mean reward
R = tf.reshape(R, (batch_size, 1))
R = tf.tile(R, [1, (nGlimpses)*2])
# get the location
p_loc = gaussian_pdf(mean_locs, sampled_locs)
p_loc = tf.tanh(p_loc)
p_loc_orig = p_loc
p_loc = tf.reshape(p_loc, (batch_size, (nGlimpses) * 2))
# define the cost function
J = tf.concat(1, [tf.log(p_y + SMALL_NUM) * (onehot_labels_placeholder), tf.log(p_loc + SMALL_NUM) * (R - no_grad_b)])
J = tf.reduce_sum(J, 1)
J = J - tf.reduce_sum(tf.square(R - b), 1)
J = tf.reduce_mean(J, 0)
cost = -J
# define the optimizer
optimizer = tf.train.MomentumOptimizer(lr, momentumValue)
train_op = optimizer.minimize(cost, global_step)
return cost, reward, max_p_y, correct_y, train_op, b, tf.reduce_mean(b), tf.reduce_mean(R - b), lr
评论列表
文章目录