def __init__(self, env, monitor_path, video=True, **usercfg):
super(Karpathy, self).__init__(**usercfg)
self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
self.nA = self.env.action_space.n
# Default configuration. Can be overwritten using keyword arguments.
self.config.update(dict(
# timesteps_per_batch=10000,
# n_iter=100,
episode_max_length=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps"),
gamma=0.99,
learning_rate=0.05,
batch_size=10, # Amount of episodes after which to adapt gradients
decay_rate=0.99, # Used for RMSProp
n_hidden_units=20,
draw_frequency=50, # Draw a plot every 50 episodes
repeat_n_actions=1
))
self.config.update(usercfg)
self.build_network()
评论列表
文章目录