def tf_discount_rewards(self, tf_r): # tf_r ~ [game_steps,1]
discount_f = lambda a, v: a * self._gamma + v;
tf_r_reverse = tf.scan(discount_f, tf.reverse(tf_r, [True, False]))
tf_discounted_r = tf.reverse(tf_r_reverse, [True, False])
return tf_discounted_r
评论列表
文章目录