model.py 文件源码-python代码片段

model.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：self-driving-cars 作者: musyoku 项目源码文件源码

def forward_one_step(self, state, action, reward, next_state, test=False):
        xp = cuda.cupy if config.use_gpu else np
        n_batch = state.shape[0]
        state = Variable(state)
        next_state = Variable(next_state)
        if config.use_gpu:
            state.to_gpu()
            next_state.to_gpu()
        q = self.compute_q_variable(state, test=test)

        max_target_q = self.compute_target_q_variable(next_state, test=test)
        max_target_q = xp.amax(max_target_q.data, axis=1)

        target = q.data.copy()

        for i in xrange(n_batch):
            if episode_ends[i] is True:
                target_value = np.sign(reward[i])
            else:
                target_value = np.sign(reward[i]) + config.rl_discount_factor * max_target_q[i]
            action_index = self.get_index_with_action(action[i])
            old_value = target[i, action_index]
            diff = target_value - old_value
            if diff > 1.0:
                target_value = 1.0 + old_value  
            elif diff < -1.0:
                target_value = -1.0 + old_value 
            target[i, action_index] = target_value

        target = Variable(target)

        loss = F.mean_squared_error(target, q)
        return loss, q