def _action_norm_dist(inpt, num_actions, w_init, activation_fn_v, activation_fn_a):
mu = layers.fully_connected(inpt, num_outputs=num_actions, weights_initializer=w_init, activation_fn=activation_fn_v)
sigma = layers.fully_connected(inpt, num_outputs=num_actions, weights_initializer=w_init, activation_fn=activation_fn_a)
return mu, sigma
# # cnn network frame
# def cnn_frame_continu(hiddens, kerners, strides, inpt, num_actions, scope=None, activation_fn=tf.nn.relu, activation_fn_mu=tf.nn.relu, activation_fn_sigma=tf.nn.relu, reuse=None):
# with tf.variable_scope(scope, reuse=reuse):
# out = inpt
# for kerner, stride in kerners, strides:
# out = tf.nn.conv2d(input=out, filter=kerner, stride=stride)
# out = layers.flatten(out)
# with tf.name_scope("out"):
# mu = layers.fully_connected(out, num_outputs=num_actions, weights_initializer=tf.truncated_normal_initializer(0 , 0.3), activation_fn=None)
# sigma = layers.fully_connected(out, num_outputs=num_actions, weights_initializer=tf.truncated_normal_initializer(0 , 0.3), activation_fn=tf.nn.softplus)
# return mu, sigma
评论列表
文章目录