RecurrentVisualAttentionMNIST.py 文件源码-python代码片段

def add_loss_op(self):
        # max_class = tf.transpose(tf.expand_dims(tf.arg_max(self.predicted_class, 2), axis=2), [1, 0, 2])
        max_class = tf.expand_dims(tf.arg_max(self.logits, 1), axis=1)
        true_labels = tf.cast(self.targets_placeholder, tf.int64)
        rewards = tf.cast(tf.equal(max_class, true_labels), tf.float32)
        tot_cum_rewards = rewards

        baseline_op = tf.stop_gradient(self.baselines)
        stable_rewards = tf.tile(tot_cum_rewards, (1, self.config.seq_len)) - tf.squeeze(baseline_op, axis=2)
        baseline_mse = tf.reduce_mean(tf.square((stable_rewards)))
        self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.squeeze(true_labels,axis=1)))

        ll = tf.contrib.distributions.Normal(tf.stack(self.mean_loc), self.config.variance).log_pdf(tf.stack(self.sampled_loc))
        ll = tf.transpose(tf.reduce_sum(ll, axis=2))
        reward_loss = tf.reduce_mean(ll*stable_rewards, axis=[0, 1])

        self.loss = -reward_loss + baseline_mse + self.cross_entropy
        self.total_rewards = tf.reduce_mean(tot_cum_rewards)