def sample_action(self, observation):
"""
Samples an action from \pi_\theta(a|s)
tf ops are eliminated on purpose here since this is a hot code path and
were optimizing for CPU usage...or maybe tf.multinomial is just slow in general.
Using TF ops:
sample_action_op = tf.squeeze(tf.nn.softmax(self.net.logits))
action = tf.multinomial(sample_action_op)
"""
# TODO: ensure this works when num_actions > 1
action_probs = self.net.sess.run(
self.net.logits,
{self.net.obs: [observation]}
)[0]
action = np.random.choice(np.arange(len(action_probs)), p = self.softmax(action_probs))
return [action]
评论列表
文章目录