def benchmark_score_from_local(benchmark_id, training_dir):
spec = gym.benchmark_spec(benchmark_id)
directories = []
for name, _, files in os.walk(training_dir):
manifests = gym.monitoring.detect_training_manifests(name, files=files)
if manifests:
directories.append(name)
benchmark_results = defaultdict(list)
for training_dir in directories:
results = gym.monitoring.load_results(training_dir)
env_id = results['env_info']['env_id']
benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
# from pprint import pprint
# pprint(benchmark_result)
benchmark_results[env_id].append(benchmark_result)
return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
python类benchmark_spec()的实例源码
def benchmark_score_from_local(benchmark_id, training_dir):
spec = gym.benchmark_spec(benchmark_id)
directories = []
for name, _, files in os.walk(training_dir):
manifests = gym.monitoring.detect_training_manifests(name, files=files)
if manifests:
directories.append(name)
benchmark_results = defaultdict(list)
for training_dir in directories:
results = gym.monitoring.load_results(training_dir)
env_id = results['env_info']['env_id']
benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
# from pprint import pprint
# pprint(benchmark_result)
benchmark_results[env_id].append(benchmark_result)
return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
def benchmark_score_from_local(benchmark_id, training_dir):
spec = gym.benchmark_spec(benchmark_id)
directories = []
for name, _, files in os.walk(training_dir):
manifests = gym.monitoring.detect_training_manifests(name, files=files)
if manifests:
directories.append(name)
benchmark_results = defaultdict(list)
for training_dir in directories:
results = gym.monitoring.load_results(training_dir)
env_id = results['env_info']['env_id']
benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
# from pprint import pprint
# pprint(benchmark_result)
benchmark_results[env_id].append(benchmark_result)
return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
def main():
# Games that we'll be testing.
game_to_ID = {'BeamRider':0,
'Breakout':1,
'Enduro':2,
'Pong':3,
'Qbert':4}
# Get some arguments here. Note: num_timesteps default uses tasks default.
parser = argparse.ArgumentParser()
parser.add_argument('--game', type=str, default='Pong')
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--num_timesteps', type=int, default=40000000)
args = parser.parse_args()
# Choose the game to play and set log file.
benchmark = gym.benchmark_spec('Atari40M')
task = benchmark.tasks[game_to_ID[args.game]]
log_name = args.game+"_s"+str(args.seed).zfill(3)+".pkl"
# Run training. Should change the seed if possible!
# Also, the actual # of iterations run is _roughly_ num_timesteps/4.
seed = args.seed
env = get_env(task, seed)
session = get_session()
print("task = {}".format(task))
atari_learn(env,
session,
num_timesteps=args.num_timesteps,
log_file=log_name)
def atari_main():
# Get Atari games.
benchmark = gym.benchmark_spec('Atari40M')
# Change the index to select a different game.
# ['BeamRiderNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'EnduroNoFrameskip-v4',
# 'PongNoFrameskip-v4', 'QbertNoFrameskip-v4', 'SeaquestNoFrameskip-v4',
# 'SpaceInvadersNoFrameskip-v4']
task = benchmark.tasks[1]
print('availabe tasks: ', [t.env_id for t in benchmark.tasks])
print('task: ', task.env_id, 'max steps: ', task.max_timesteps)
# Run training
seed = 0 # Use a seed of zero (you may want to randomize the seed!)
env = get_env(task, seed)
last_obs = env.reset()
exploration_schedule = PiecewiseSchedule(
[
(0, 1.0),
(1e6, 0.1),
(task.max_timesteps / 2, 0.01),
], outside_value=0.01
)
dqn = DoubleDQN(image_shape=(84, 84, 1),
num_actions=env.action_space.n,
training_starts=50000,
target_update_freq=10000,
training_batch_size=32,
# training_starts=2000,
# target_update_freq=500,
# training_batch_size=3,
exploration=exploration_schedule
)
reward_sum_episode = 0
num_episodes = 0
episode_rewards = deque(maxlen=100)
for step in range(task.max_timesteps):
if step > 0 and step % 1000 == 0:
print('step: ', step, 'episodes:', num_episodes, 'epsilon:', exploration_schedule.value(step),
'learning rate:', dqn.get_learning_rate(), 'last 100 training loss mean', dqn.get_avg_loss(),
'last 100 episode mean rewards: ', np.mean(np.array(episode_rewards, dtype=np.float32)))
env.render()
action = dqn.choose_action(step, last_obs)
obs, reward, done, info = env.step(action)
reward_sum_episode += reward
dqn.learn(step, action, reward, done, info)
if done:
last_obs = env.reset()
episode_rewards.append(reward_sum_episode)
reward_sum_episode = 0
num_episodes += 1
else:
last_obs = obs
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos):
# We're uploading a benchmark run.
directories = []
env_ids = []
for name, _, files in os.walk(training_dir):
manifests = monitoring.detect_training_manifests(name, files=files)
if manifests:
env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
env_ids.append(env_info['env_id'])
directories.append(name)
# Validate against benchmark spec
try:
spec = benchmark_spec(benchmark_id)
except error.UnregisteredBenchmark:
raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))
spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]
if not env_ids:
raise error.Error("Could not find any evaluations in {}".format(training_dir))
# This could be more stringent about mixing evaluations
if sorted(env_ids) != sorted(spec_env_ids):
logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))
tags = json.dumps(benchmark_run_tags)
_create_with_retries = util.retry_exponential_backoff(
resource.BenchmarkRun.create,
(error.APIConnectionError,),
max_retries=5,
interval=3,
)
benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags)
benchmark_run_id = benchmark_run.id
# Actually do the uploads.
for training_dir in directories:
# N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
_upload_with_retries = util.retry_exponential_backoff(
_upload,
(error.APIConnectionError,),
max_retries=5,
interval=3,
)
_upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)
logger.info("""
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:
%s
****************************************************
""".rstrip(), benchmark_id, benchmark_run.web_url())
return benchmark_run_id