def _set_interp_values(self):
"""
Use iteration-based interpolation to set values of some
schedule-based parameters.
"""
# Compute temporal interpolation value.
t = min((self.iteration_count + 1.0) /
(self._hyperparams['iterations'] - 1), 1)
# Perform iteration-based interpolation of entropy penalty.
if type(self._hyperparams['ent_reg_schedule']) in (int, float):
self.policy_opt.set_ent_reg(self._hyperparams['ent_reg_schedule'])
else:
sch = self._hyperparams['ent_reg_schedule']
self.policy_opt.set_ent_reg(
np.exp(np.interp(t, np.linspace(0, 1, num=len(sch)),
np.log(sch)))
)
# Perform iteration-based interpolation of Lagrange multiplier.
if type(self._hyperparams['lg_step_schedule']) in (int, float):
self._hyperparams['lg_step'] = self._hyperparams['lg_step_schedule']
else:
sch = self._hyperparams['lg_step_schedule']
self._hyperparams['lg_step'] = np.exp(
np.interp(t, np.linspace(0, 1, num=len(sch)), np.log(sch))
)
评论列表
文章目录