def tf_parameterize(self, x):
# Flat logits
logits = self.logits.apply(x=x)
# Reshape logits to action shape
shape = (-1,) + self.shape + (self.num_actions,)
logits = tf.reshape(tensor=logits, shape=shape)
# !!!
state_value = tf.reduce_logsumexp(input_tensor=logits, axis=-1)
# Softmax for corresponding probabilities
probabilities = tf.nn.softmax(logits=logits, dim=-1)
# Min epsilon probability for numerical stability
probabilities = tf.maximum(x=probabilities, y=util.epsilon)
# "Normalized" logits
logits = tf.log(x=probabilities)
return logits, probabilities, state_value
评论列表
文章目录