def select_action(self, t, greedy_action_func, action_value=None):
a = greedy_action_func()
if self.ou_state is None:
if self.start_with_mu:
self.ou_state = np.full(a.shape, self.mu, dtype=np.float32)
else:
sigma_stable = (self.sigma /
np.sqrt(2 * self.theta - self.theta ** 2))
self.ou_state = np.random.normal(
size=a.shape,
loc=self.mu, scale=sigma_stable).astype(np.float32)
else:
self.evolve()
noise = self.ou_state
self.logger.debug('t:%s noise:%s', t, noise)
return a + noise
评论列表
文章目录