def main():
arguments = docopt.docopt(__doc__)
# Run training
seed = 0 # Use a seed of zero (you may want to randomize the seed!)
env = get_env(arguments['--envid'], seed)
with get_session() as session:
model = arguments['--model'].lower()
num_filters = int(arguments['--num-filters'])
batch_size = int(arguments['--batch-size'])
print(' * [INFO] %s model (Filters: %d, Batch Size: %d)' % (
model, num_filters, batch_size))
save_path = atari_learn(
env,
session,
num_timesteps=int(arguments['--timesteps']),
num_filters=num_filters,
model=model,
batch_size=batch_size,
restore=arguments['--restore'],
checkpoint_dir=arguments['--ckpt-dir'],
learning_starts=arguments['--learning-starts'])
reader = tf.train.NewCheckpointReader(save_path)
W = reader.get_tensor('q_func/action_value/fully_connected/weights')
print('Largest entry:', np.linalg.norm(W, ord=np.inf))
print('Frobenius norm:', np.linalg.norm(W, ord='fro'))
评论列表
文章目录