FittedFQI.py 文件源码-python代码片段

FittedFQI.py 文件源码

python

阅读 34 收藏 0 点赞 0 评论 0

def learn(self, experiences, max_iter=20):
        # experience is in (s, a, r, ns)
        states = experiences[:, 0:self.domain.state_space_dims]
        actions = experiences[:, self.domain.state_space_dims]
        rewards = experiences[:, self.domain.state_space_dims+1]
        next_states = experiences[:, self.domain.state_space_dims+2:]
        X = self.representation.phi_sa("root", states, actions)

        for i in range(0, max_iter):
            #old_qs = np.reshape(self.representation.Q("root", states, actions), (-1, 1))
            nqs = self.representation.Qs("root", next_states)
            best_nqs = np.reshape(np.amax(nqs, axis=1), (-1, 1))
            y = rewards+ self.domain.discount_factor * best_nqs
            #resd = np.mean(np.abs(y - old_qs))
            model = KNeighborsRegressor(n_neighbors=2, n_jobs=-1)
            model.fit(X, y)
            self.representation.models["root"] = model
            #print "Residual is " + str(resd)