def add_new_weak_learner(self):
'''
Summary:
Adds a new function, h, to self.weak_learners by solving for Eq. 1 using multiple additive regression trees:
[Eq. 1] h = argmin_h (sum_i Q_A(s_i,a_i) + h(s_i, a_i) - (r_i + max_b Q_A(s'_i, b)))
'''
if len(self.most_recent_episode) == 0:
# If this episode contains no data, don't do anything.
return
# Build up data sets of features and loss terms
data = np.zeros((len(self.most_recent_episode), self.max_state_features + 1))
total_loss = np.zeros(len(self.most_recent_episode))
for i, experience in enumerate(self.most_recent_episode):
# Grab the experience.
s, a, r, s_prime = experience
# Pad in case the state features are too short (as in Atari sometimes).
features = self._pad_features_with_zeros(s, a)
loss = (r + self.gamma * self.get_max_q_value(s_prime) - self.get_q_value(s, a))
# Add to relevant lists.
data[i] = features
total_loss[i] = loss
# Compute new regressor and add it to the weak learners.
estimator = GradientBoostingRegressor(loss='ls', n_estimators=1, max_depth=self.max_depth)
estimator.fit(data, total_loss)
self.weak_learners.append(estimator)
GradientBoostingAgentClass.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录