def __init__(self,sess,state_dim,action_dim,scope):
self.state_dim = state_dim
self.action_dim = action_dim
# create actor network
self.state_input,self.action_output,self.net = self.create_network(state_dim,action_dim,scope)
# create target actor network
self.target_state_input,self.target_action_output,self.target_update,self.target_net = self.create_target_network(state_dim,action_dim,self.net,scope)
# define training rules
if scope != 'global/actor':
self.q_gradient_input = tf.placeholder("float",[None,self.action_dim])
self.parameters_gradients = tf.gradients(self.action_output,self.net,-self.q_gradient_input)
global_vars_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global/actor')
self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients,global_vars_actor))
sess.run(tf.global_variables_initializer())
#self.update_target()
#self.load_network()
actor_network.py 文件源码
python
阅读 34
收藏 0
点赞 0
评论 0
评论列表
文章目录