def act(self, obs):
# Use the process-local model for acting
with chainer.no_backprop_mode():
statevar = self.batch_states([obs], self.xp, self.phi)
action_distrib, _ = self.model(statevar)
if self.act_deterministically:
return chainer.cuda.to_cpu(
action_distrib.most_probable.data)[0]
else:
return chainer.cuda.to_cpu(action_distrib.sample().data)[0]
评论列表
文章目录