def __init__(self, session,
optimizer,
actor_network,
critic_network,
state_dim,
action_dim,
batch_size=32,
replay_buffer_size=1000000, # size of replay buffer
store_replay_every=1, # how frequent to store experience
discount_factor=0.99, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
noise_sigma=0.20,
noise_theta=0.15,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.actor_network = actor_network
self.critic_network = critic_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# training parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.action_dim = action_dim
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.max_gradient = max_gradient
self.reg_param = reg_param
# Ornstein-Uhlenbeck noise for exploration
self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.session.run(tf.variables_initializer(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
评论列表
文章目录