def tf_sample(self, distr_params, deterministic):
alpha, beta, alpha_beta, _ = distr_params
# Deterministic: mean as action
definite = beta / alpha_beta
# Non-deterministic: sample action using gamma distribution
alpha_sample = tf.random_gamma(shape=(), alpha=alpha)
beta_sample = tf.random_gamma(shape=(), alpha=beta)
sampled = beta_sample / tf.maximum(x=(alpha_sample + beta_sample), y=util.epsilon)
return self.min_value + (self.max_value - self.min_value) * \
tf.where(condition=deterministic, x=definite, y=sampled)
评论列表
文章目录