def get_bonus(self,path):
if self._fit_steps > self._yield_zeros_until:
bonus = self._coeff * self._f_predict(path['observations']).reshape(-1)
if self._filter_bonuses:
bonus = bonus * (np.invert(self._wrapped_constraint.evaluate(path)))
return bonus
else:
return np.zeros(path["rewards"].size)
评论列表
文章目录