MADDPG.py 文件源码-python代码片段

MADDPG.py 文件源码

python

阅读 36 收藏 0 点赞 0 评论 0

项目：pytorch-maddpg 作者: xuehy 项目源码文件源码

def select_action(self, state_batch):
        # state_batch: n_agents x state_dim
        actions = Variable(th.zeros(
            self.n_agents,
            self.n_actions))
        FloatTensor = th.cuda.FloatTensor if self.use_cuda else th.FloatTensor
        for i in range(self.n_agents):
            sb = state_batch[i, :].detach()
            act = self.actors[i](sb.unsqueeze(0)).squeeze()

            act += Variable(
                th.from_numpy(
                    np.random.randn(2) * self.var[i]).type(FloatTensor))

            if self.episode_done > self.episodes_before_train and\
               self.var[i] > 0.05:
                self.var[i] *= 0.999998
            act = th.clamp(act, -1.0, 1.0)

            actions[i, :] = act
        self.steps_done += 1

        return actions