def select_action(self, t, greedy_action_func, action_value=None):
assert action_value is not None
assert isinstance(action_value,
chainerrl.action_value.DiscreteActionValue)
n_actions = action_value.q_values.shape[1]
with chainer.no_backprop_mode():
probs = chainer.cuda.to_cpu(
F.softmax(action_value.q_values / self.T).data).ravel()
return np.random.choice(np.arange(n_actions), p=probs)
评论列表
文章目录