critic_network.py 文件源码-python代码片段

critic_network.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：-NIPS-2017-Learning-to-Run 作者: kyleliang919 项目源码文件源码

def create_q_network(self,state_dim,action_dim,scope):
        # the layer size could be changed
        with tf.variable_scope(scope,reuse=False) as s:
            state_input = tf.placeholder("float",[None,None,state_dim])
            action_input = tf.placeholder("float",[None,None,action_dim])

            # creating the recurrent part
            lstm_cell=rnn.BasicLSTMCell(LSTM_HIDDEN_UNIT)
            lstm_output,lstm_state=tf.nn.dynamic_rnn(cell=lstm_cell,inputs=tf.concat([state_input,action_input],2),dtype=tf.float32)

            W3 = tf.Variable(tf.random_uniform([lstm_cell.output_size,1],-3e-3,3e-3))
            b3 = tf.Variable(tf.random_uniform([1],-3e-3,3e-3))
            q_value_output = tf.identity(tf.matmul(layer2,W3) + b3)
            net = [v for v in tf.trainable_variables() if scope in v.name]
        return state_input,action_input,q_value_output,net