def __init__(
self,
normalization_parameters,
parameters,
):
self._quantile_states = collections.deque(
maxlen=parameters.action_budget.window_size
)
self._quantile = 100 - parameters.action_budget.action_limit
self.quantile_value = 0
self._limited_action = np.argmax(
np.array(parameters.actions) ==
parameters.action_budget.limited_action
)
self._discount_factor = parameters.rl.gamma
self._quantile_update_rate = \
parameters.action_budget.quantile_update_rate
self._quantile_update_frequency = \
parameters.action_budget.quantile_update_frequency
self._update_counter = 0
super(self.__class__,
self).__init__(normalization_parameters, parameters)
self._max_q = parameters.rl.maxq_learning
limited_discrete_action_trainer.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录