def create_critic_net(self, num_states=4, num_actions=1):
N_HIDDEN_1 = 400
N_HIDDEN_2 = 300
critic_state_in = tf.placeholder("float",[None,num_states])
critic_action_in = tf.placeholder("float",[None,num_actions])
W1_c = tf.Variable(tf.random_uniform([num_states,N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
W2_c = tf.Variable(tf.random_uniform([N_HIDDEN_1,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
W2_action_c = tf.Variable(tf.random_uniform([num_actions,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
B2_c= tf.Variable(tf.random_uniform([N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
W3_c= tf.Variable(tf.random_uniform([N_HIDDEN_2,1],-0.003,0.003))
B3_c= tf.Variable(tf.random_uniform([1],-0.003,0.003))
H1_c=tf.nn.softplus(tf.matmul(critic_state_in,W1_c)+B1_c)
H2_c=tf.nn.tanh(tf.matmul(H1_c,W2_c)+tf.matmul(critic_action_in,W2_action_c)+B2_c)
critic_q_model=tf.matmul(H2_c,W3_c)+B3_c
return W1_c, B1_c, W2_c, W2_action_c, B2_c, W3_c, B3_c, critic_q_model, critic_state_in, critic_action_in
评论列表
文章目录