def create_actor_net(self, num_states=4, num_actions=1):
""" Network that takes states and return action """
N_HIDDEN_1 = 400
N_HIDDEN_2 = 300
actor_state_in = tf.placeholder("float",[None,num_states])
W1_a=tf.Variable(tf.random_uniform([num_states,N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
B1_a=tf.Variable(tf.random_uniform([N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
W2_a=tf.Variable(tf.random_uniform([N_HIDDEN_1,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1),1/math.sqrt(N_HIDDEN_1)))
B2_a=tf.Variable(tf.random_uniform([N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1),1/math.sqrt(N_HIDDEN_1)))
W3_a=tf.Variable(tf.random_uniform([N_HIDDEN_2,num_actions],-0.003,0.003))
B3_a=tf.Variable(tf.random_uniform([num_actions],-0.003,0.003))
H1_a=tf.nn.softplus(tf.matmul(actor_state_in,W1_a)+B1_a)
H2_a=tf.nn.tanh(tf.matmul(H1_a,W2_a)+B2_a)
actor_model=tf.matmul(H2_a,W3_a) + B3_a
return W1_a, B1_a, W2_a, B2_a, W3_a, B3_a, actor_state_in, actor_model
评论列表
文章目录