def act(self, obs):
with chainer.no_backprop_mode():
batch_obs = self.batch_states([obs], self.xp, self.phi)
action_distrib = self.model(batch_obs)
if self.act_deterministically:
return chainer.cuda.to_cpu(
action_distrib.most_probable.data)[0]
else:
return chainer.cuda.to_cpu(action_distrib.sample().data)[0]
评论列表
文章目录