def __init__(self, session,
optimizer,
policy_network,
state_dim,
num_actions,
init_exp=0.5, # initial exploration prob
final_exp=0.0, # final exploration prob
anneal_steps=10000, # N steps for annealing exploration
discount_factor=0.99, # discount future rewards
reg_param=0.001, # regularization constants
max_gradient=5, # max gradient norms
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.policy_network = policy_network
# training parameters
self.state_dim = state_dim
self.num_actions = num_actions
self.discount_factor = discount_factor
self.max_gradient = max_gradient
self.reg_param = reg_param
# exploration parameters
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
# counters
self.train_iteration = 0
# rollout buffer
self.state_buffer = []
self.reward_buffer = []
self.action_buffer = []
# record reward history for normalization
self.all_rewards = []
self.max_reward_length = 1000000
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
评论列表
文章目录