loss.py 文件源码-python代码片段

loss.py 文件源码

python

阅读 31 收藏 0 点赞 0 评论 0

项目：relaax 作者: deeplearninc 项目源码文件源码

def build_graph(self, actor, critic, cfg):
        self.ph_action = graph.Placeholder(np.int32, shape=(None,), name="ph_action")
        self.ph_advantage = graph.Placeholder(np.float32, shape=(None,), name="ph_adv")
        self.ph_discounted_reward = graph.Placeholder(np.float32, shape=(None,), name="ph_edr")

        action_one_hot = tf.one_hot(self.ph_action.node, actor.action_size)

        # avoid NaN
        log_pi = tf.log(tf.maximum(actor.node, 1e-20))

        # policy entropy
        self.entropy = -tf.reduce_sum(actor.node * log_pi)

        # policy loss
        self.policy_loss = -(tf.reduce_sum(tf.reduce_sum(log_pi * action_one_hot, axis=1) * self.ph_advantage.node)
                             + self.entropy * cfg.entropy_beta)

        # value loss
        self.value_loss = tf.reduce_sum(tf.square(self.ph_discounted_reward.node - critic.node))

        # gradient of policy and value are summed up
        # (Learning rate for the Critic is sized by critic_scale parameter)
        return self.policy_loss + cfg.critic_scale * self.value_loss