def _network(self):
# with tf.variable_scope(scope):
w_init = tf.random_normal_initializer(0., .1)
# actor part
# return mu & sigma to determine action_norm_dist
scope_var = "actor"
mu, sigma = net_frame.mlp_frame([200] , self.state , self.action_dim , scope_var, \
activation_fn=tf.nn.relu6, w_init=w_init, activation_fn_v=tf.nn.tanh, \
activation_fn_a=tf.nn.softplus, continu=True)
# cirtic part
# return value of the state
scope_var = "critic"
v = net_frame.mlp_frame([100], self.state, 1, scope_var, activation_fn=tf.nn.relu6)
return mu, sigma, v
# ===============================================================
# DDPG Agent
# ===============================================================
评论列表
文章目录