def tf_log_probability(self, distr_params, action):
alpha, beta, _, log_norm = distr_params
action = (action - self.min_value) / (self.max_value - self.min_value)
action = tf.minimum(x=action, y=(1.0 - util.epsilon))
return (beta - 1.0) * tf.log(x=tf.maximum(x=action, y=util.epsilon)) + \
(alpha - 1.0) * tf.log1p(x=-action) - log_norm
评论列表
文章目录