agent_base.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:trpo 作者: jjkke88 项目源码 文件源码
def __init__(self, env):
        self.env = env
        # if not isinstance(env.observation_space, Box) or \
        #    not isinstance(env.action_space, Discrete):
        #     print("Incompatible spaces.")
        #     exit(-1)
        print("Observation Space", env.observation_space)
        print("Action Space", env.action_space)
        print("Action area, high:%f, low%f" % (env.action_space.high, env.action_space.low))
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1 / 3.0)
        self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        self.end_count = 0
        self.paths = []
        self.train = True
        self.baseline = Baseline()
        self.storage = Storage(self, self.env, self.baseline)
        self.distribution = DiagonalGaussian(pms.action_shape)
        self.net = None

    # def init_logger(self):
    #     head = ["average_episode_std" , "sum steps episode number" "total number of episodes" ,
    #             "Average sum of rewards per episode" ,
    #             "KL between old and new distribution" , "Surrogate loss" , "Surrogate loss prev" , "ds" , "entropy" ,
    #             "mean_advant"]
    #     self.logger = Logger(head)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号