def __init__(self,num_states,num_actions):
self.g=tf.Graph()
with self.g.as_default():
self.sess = tf.InteractiveSession()
#actor network model parameters:
self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a, self.B3_a,\
self.actor_state_in, self.actor_model = self.create_actor_net(num_states, num_actions)
#target actor network model parameters:
self.t_W1_a, self.t_B1_a, self.t_W2_a, self.t_B2_a, self.t_W3_a, self.t_B3_a,\
self.t_actor_state_in, self.t_actor_model = self.create_actor_net(num_states, num_actions)
#cost of actor network:
self.q_gradient_input = tf.placeholder("float",[None,num_actions]) #gets input from action_gradient computed in critic network file
self.actor_parameters = [self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a, self.B3_a]
self.parameters_gradients = tf.gradients(self.actor_model,self.actor_parameters,-self.q_gradient_input)#/BATCH_SIZE)
self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients,self.actor_parameters))
#initialize all tensor variable parameters:
self.sess.run(tf.initialize_all_variables())
#To make sure actor and target have same intial parmameters copy the parameters:
# copy target parameters
self.sess.run([
self.t_W1_a.assign(self.W1_a),
self.t_B1_a.assign(self.B1_a),
self.t_W2_a.assign(self.W2_a),
self.t_B2_a.assign(self.B2_a),
self.t_W3_a.assign(self.W3_a),
self.t_B3_a.assign(self.B3_a)])
self.update_target_actor_op = [
self.t_W1_a.assign(TAU*self.W1_a+(1-TAU)*self.t_W1_a),
self.t_B1_a.assign(TAU*self.B1_a+(1-TAU)*self.t_B1_a),
self.t_W2_a.assign(TAU*self.W2_a+(1-TAU)*self.t_W2_a),
self.t_B2_a.assign(TAU*self.B2_a+(1-TAU)*self.t_B2_a),
self.t_W3_a.assign(TAU*self.W3_a+(1-TAU)*self.t_W3_a),
self.t_B3_a.assign(TAU*self.B3_a+(1-TAU)*self.t_B3_a)]
评论列表
文章目录