rl.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:anirban-imitation 作者: Santara 项目源码 文件源码
def _make_actiondist_ops(self, obsfeat_B_Df):
        # Computes action distribution mean (of a Gaussian) using MLP
        with nn.variable_scope('hidden'):
            net = nn.FeedforwardNet(obsfeat_B_Df, (self.obsfeat_space.dim,), self.cfg.hidden_spec)
        with nn.variable_scope('out'):
            mean_layer = nn.AffineLayer(net.output, net.output_shape, (self.action_space.dim,), initializer=np.zeros((net.output_shape[0], self.action_space.dim)))
            assert mean_layer.output_shape == (self.action_space.dim,)
        means_B_Da = mean_layer.output

        # Action distribution log standard deviations are parameters themselves
        logstdevs_1_Da = nn.get_variable('logstdevs_1_Da', np.full((1, self.action_space.dim), self.cfg.init_logstdev), broadcastable=(True,False))
        stdevs_1_Da = self.cfg.min_stdev + tensor.exp(logstdevs_1_Da) # minimum stdev seems to make density / kl computations more stable
        stdevs_B_Da = tensor.ones_like(means_B_Da)*stdevs_1_Da # "broadcast" to (B,Da)

        actiondist_B_Pa = tensor.concatenate([means_B_Da, stdevs_B_Da], axis=1)
        return actiondist_B_Pa
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号