def __init__(self, env):
self.env = env
# if not isinstance(env.observation_space, Box) or \
# not isinstance(env.action_space, Discrete):
# print("Incompatible spaces.")
# exit(-1)
print("Observation Space", env.observation_space)
print("Action Space", env.action_space)
print("Action area, high:%f, low%f" % (env.action_space.high, env.action_space.low))
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1 / 3.0)
self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
self.end_count = 0
self.paths = []
self.train = True
self.baseline = Baseline()
self.storage = Storage(self, self.env, self.baseline)
self.distribution = DiagonalGaussian(pms.action_shape)
self.net = None
# def init_logger(self):
# head = ["average_episode_std" , "sum steps episode number" "total number of episodes" ,
# "Average sum of rewards per episode" ,
# "KL between old and new distribution" , "Surrogate loss" , "Surrogate loss prev" , "ds" , "entropy" ,
# "mean_advant"]
# self.logger = Logger(head)
评论列表
文章目录