def create_q_network(self,state_dim,action_dim,scope):
# the layer size could be changed
with tf.variable_scope(scope,reuse=False) as s:
state_input = tf.placeholder("float",[None,None,state_dim])
action_input = tf.placeholder("float",[None,None,action_dim])
# creating the recurrent part
lstm_cell=rnn.BasicLSTMCell(LSTM_HIDDEN_UNIT)
lstm_output,lstm_state=tf.nn.dynamic_rnn(cell=lstm_cell,inputs=tf.concat([state_input,action_input],2),dtype=tf.float32)
W3 = tf.Variable(tf.random_uniform([lstm_cell.output_size,1],-3e-3,3e-3))
b3 = tf.Variable(tf.random_uniform([1],-3e-3,3e-3))
q_value_output = tf.identity(tf.matmul(layer2,W3) + b3)
net = [v for v in tf.trainable_variables() if scope in v.name]
return state_input,action_input,q_value_output,net
critic_network.py 文件源码
python
阅读 32
收藏 0
点赞 0
评论 0
评论列表
文章目录