def act(self, obs):
# Use the process-local model for acting
with chainer.no_backprop_mode():
statevar = self.batch_states([obs], np, self.phi)
pout, _ = self.model.pi_and_v(statevar)
if self.act_deterministically:
return pout.most_probable.data[0]
else:
return pout.sample().data[0]
评论列表
文章目录