def get_mc_target(rewards_t, discount):
discounts = discount ** tf.cast(tf.range(tf.shape(rewards_t)[0]), dtype=tf.float32)
epsilon = 1e-7
return tf.cumsum(rewards_t * discounts, reverse=True) / (discounts + epsilon)
评论列表
文章目录