def act(self, state):
with chainer.using_config('train', False):
s = self.batch_states([state], self.xp, self.phi)
if self.act_deterministically:
action = self.policy(s).most_probable
else:
action = self.policy(s).sample()
# Q is not needed here, but log it just for information
q = self.q_function(s, action)
# Update stats
self.average_q *= self.average_q_decay
self.average_q += (1 - self.average_q_decay) * float(q.data)
self.logger.debug('t:%s a:%s q:%s',
self.t, action.data[0], q.data)
return cuda.to_cpu(action.data[0])
评论列表
文章目录