def add_loss_op(self):
# max_class = tf.transpose(tf.expand_dims(tf.arg_max(self.predicted_class, 2), axis=2), [1, 0, 2])
max_class = tf.expand_dims(tf.arg_max(self.logits, 1), axis=1)
true_labels = tf.cast(self.targets_placeholder, tf.int64)
rewards = tf.cast(tf.equal(max_class, true_labels), tf.float32)
tot_cum_rewards = rewards
baseline_op = tf.stop_gradient(self.baselines)
stable_rewards = tf.tile(tot_cum_rewards, (1, self.config.seq_len)) - tf.squeeze(baseline_op, axis=2)
baseline_mse = tf.reduce_mean(tf.square((stable_rewards)))
self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.squeeze(true_labels,axis=1)))
ll = tf.contrib.distributions.Normal(tf.stack(self.mean_loc), self.config.variance).log_pdf(tf.stack(self.sampled_loc))
ll = tf.transpose(tf.reduce_sum(ll, axis=2))
reward_loss = tf.reduce_mean(ll*stable_rewards, axis=[0, 1])
self.loss = -reward_loss + baseline_mse + self.cross_entropy
self.total_rewards = tf.reduce_mean(tot_cum_rewards)
RecurrentVisualAttentionMNIST.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录