def _initialize(self):
logger.debug("Initializing Policy.")
# check if policy is already initialized by the user
if self.policy.initialized:
logger.debug("Use pre-set policy parameters.")
return self.policy.parameters
# outerwise draw an element at random from the parameter space
parameter = self.parameter_space.sample()
for _ in range(1000):
self.policy.parameters = parameter
grad = self.estimator(self.policy)
if (norm(grad) >= 1000 * self.eps):
return parameter
parameter = self.parameter_space.sample()
logger.error('Unable to find non-zero gradient.')
评论列表
文章目录