def _act(self, state):
xp = self.xp
with chainer.using_config('train', False):
b_state = batch_states([state], xp, self.phi)
with chainer.no_backprop_mode():
action_distrib, v = self.model(b_state)
action = action_distrib.sample()
return cuda.to_cpu(action.data)[0], cuda.to_cpu(v.data)[0]
评论列表
文章目录