def __init__(self, env_spec, **kwargs):
super(HighLowMemory, self).__init__(env_spec)
# use the old self.exp as buffer, remember to clear
self.last_exp = self.exp
self.epi_memory_high = []
self.epi_memory_low = []
self.max_reward = -math.inf
self.min_reward = math.inf
# 1st 5 epis goes into bad half, recompute every 5 epis
self.threshold = math.inf
self.threshold_history = []
self.epi_num = 0
self.prob_high = 0.66
self.num_epis_to_sample = 3
self.max_epis_in_mem = 15
self.recompute_freq = 10
log_self(self)
评论列表
文章目录