def test_write_upon_reset_true():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
env.reset()
files = glob.glob(os.path.join(temp, '*'))
assert len(files) > 0, "Files: {}".format(files)
env.close()
files = glob.glob(os.path.join(temp, '*'))
assert len(files) > 0
python类Monitor()的实例源码
def test_video_callable_false_does_not_record():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, temp, video_callable=False)
env.reset()
env.close()
results = monitoring.load_results(temp)
assert len(results['videos']) == 0
def test_video_callable_records_videos():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, temp)
env.reset()
env.close()
results = monitoring.load_results(temp)
assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_semisuper_succeeds():
"""Regression test. Ensure that this can write"""
with helpers.tempdir() as temp:
env = gym.make('SemisuperPendulumDecay-v0')
env = Monitor(env, temp)
env.reset()
env.step(env.action_space.sample())
env.close()
def test_no_monitor_reset_unless_done():
def assert_reset_raises(env):
errored = False
try:
env.reset()
except error.Error:
errored = True
assert errored, "Env allowed a reset when it shouldn't have"
with helpers.tempdir() as temp:
# Make sure we can reset as we please without monitor
env = gym.make('CartPole-v0')
env.reset()
env.step(env.action_space.sample())
env.step(env.action_space.sample())
env.reset()
# can reset once as soon as we start
env = Monitor(env, temp, video_callable=False)
env.reset()
# can reset multiple times in a row
env.reset()
env.reset()
env.step(env.action_space.sample())
env.step(env.action_space.sample())
assert_reset_raises(env)
# should allow resets after the episode is done
d = False
while not d:
_, _, d, _ = env.step(env.action_space.sample())
env.reset()
env.reset()
env.step(env.action_space.sample())
assert_reset_raises(env)
env.close()
def create_env(conf, monitor_on=False):
env = gym.make(conf['env'])
if conf['monitor_dir'] != '' and monitor_on:
env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
if conf['use_atari_wrapper']:
env = AtariRescale42x42Wrapper(env)
env = NormalizeWrapper(env)
return env
def create_env(conf):
env = gym.make(conf['env'])
if conf['monitor_dir']:
env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
if conf['use_atari_wrapper']:
env = AtariRescale42x42Wrapper(env)
env = NormalizeWrapper(env)
return env
def test_no_double_wrapping():
temp = tempfile.mkdtemp()
try:
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, temp)
try:
env = wrappers.Monitor(env, temp)
except error.DoubleWrapperError:
pass
else:
assert False, "Should not allow double wrapping"
env.close()
finally:
shutil.rmtree(temp)
def test():
benchmark = registration.Benchmark(
id='MyBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5
},
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 100,
}])
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, directory=temp, video_callable=False)
env.seed(0)
env.set_monitor_mode('evaluation')
rollout(env)
env.set_monitor_mode('training')
for i in range(2):
rollout(env)
env.set_monitor_mode('evaluation')
rollout(env, good=True)
env.close()
results = monitoring.load_results(temp)
evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
benchmark_score = benchmark.score_benchmark({
'CartPole-v0': evaluation_score['scores'],
})
assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_monitor_filename():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, directory=temp)
env.close()
manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
assert len(manifests) == 1
def test_write_upon_reset_false():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
env.reset()
files = glob.glob(os.path.join(temp, '*'))
assert not files, "Files: {}".format(files)
env.close()
files = glob.glob(os.path.join(temp, '*'))
assert len(files) > 0
def test_write_upon_reset_true():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
env.reset()
files = glob.glob(os.path.join(temp, '*'))
assert len(files) > 0, "Files: {}".format(files)
env.close()
files = glob.glob(os.path.join(temp, '*'))
assert len(files) > 0
def test_video_callable_false_does_not_record():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, temp, video_callable=False)
env.reset()
env.close()
results = monitoring.load_results(temp)
assert len(results['videos']) == 0
def test_video_callable_records_videos():
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env = Monitor(env, temp)
env.reset()
env.close()
results = monitoring.load_results(temp)
assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_semisuper_succeeds():
"""Regression test. Ensure that this can write"""
with helpers.tempdir() as temp:
env = gym.make('SemisuperPendulumDecay-v0')
env = Monitor(env, temp)
env.reset()
env.step(env.action_space.sample())
env.close()
def test_no_monitor_reset_unless_done():
def assert_reset_raises(env):
errored = False
try:
env.reset()
except error.Error:
errored = True
assert errored, "Env allowed a reset when it shouldn't have"
with helpers.tempdir() as temp:
# Make sure we can reset as we please without monitor
env = gym.make('CartPole-v0')
env.reset()
env.step(env.action_space.sample())
env.step(env.action_space.sample())
env.reset()
# can reset once as soon as we start
env = Monitor(env, temp, video_callable=False)
env.reset()
# can reset multiple times in a row
env.reset()
env.reset()
env.step(env.action_space.sample())
env.step(env.action_space.sample())
assert_reset_raises(env)
# should allow resets after the episode is done
d = False
while not d:
_, _, d, _ = env.step(env.action_space.sample())
env.reset()
env.reset()
env.step(env.action_space.sample())
assert_reset_raises(env)
env.close()
def get_demo_data(env):
# env = wrappers.Monitor(env, '/tmp/CartPole-v0', force=True)
# agent.restore_model()
with tf.variable_scope('get_demo_data'):
agent = DQfDDDQN(env, DDQNConfig())
e = 0
while True:
done = False
score = 0 # sum of reward in one episode
state = env.reset()
demo = []
while done is False:
action = agent.egreedy_action(state) # e-greedy action for train
next_state, reward, done, _ = env.step(action)
score += reward
reward = reward if not done or score == 499 else -100
agent.perceive([state, action, reward, next_state, done, 0.0]) # 0. means it is not a demo data
demo.append([state, action, reward, next_state, done, 1.0]) # record the data that could be expert-data
agent.train_Q_network(update=False)
state = next_state
if done:
if score == 500: # expert demo data
demo = set_n_step(demo, Config.trajectory_n)
agent.demo_buffer.extend(demo)
agent.sess.run(agent.update_target_net)
print("episode:", e, " score:", score, " demo_buffer:", len(agent.demo_buffer),
" memory length:", len(agent.replay_buffer), " epsilon:", agent.epsilon)
if len(agent.demo_buffer) >= Config.demo_buffer_size:
agent.demo_buffer = deque(itertools.islice(agent.demo_buffer, 0, Config.demo_buffer_size))
break
e += 1
with open(Config.DEMO_DATA_PATH, 'wb') as f:
pickle.dump(agent.demo_buffer, f, protocol=2)
def init_environment(self, name='CartPole-v0', monitor=False):
self.env = gym.make(name)
if monitor:
self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False)
self.n_states = self.env.observation_space.shape[0]
self.n_actions = self.env.action_space.n
# Experience replay
self.replay = []
def __init__(self, game_name, histoy_length, render=False):
self.env = gym.make(game_name)
#self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
self.render = render
self.n_actions = self.env.action_space.n
self.n_observation = len(self.env.observation_space.high)
self.resize_width = 80
self.resize_height = 80
self.histoy_length = histoy_length # One state contains 'histoy_length' frames
self.state_buffer = deque() # Buffer keep 'histoy_length-1' frames
self.show_game_info()
def __init__(self, game_name, histoy_length, render=False):
self.env = gym.make(game_name)
#self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
self.render = render
self.n_actions = self.env.action_space.n
self.n_observation = len(self.env.observation_space.high)
self.histoy_length = histoy_length # One state contains 'histoy_length' observations
self.state_buffer = deque() # Buffer keep 'histoy_length-1' observations
self.show_game_info()