def setUp(self):
def evaluator(actions):
# negative square norm of actions
return -F.sum(actions ** 2, axis=1)
self.evaluator = evaluator
if self.has_maximizer:
def maximizer():
return chainer.Variable(np.zeros(
(self.batch_size, self.action_size), dtype=np.float32))
else:
maximizer = None
self.maximizer = maximizer
self.av = action_value.SingleActionValue(
evaluator=evaluator, maximizer=maximizer)
评论列表
文章目录