a3c.py 文件源码-python代码片段

def __init__(self, input_space, output_space, trainable=True):
        if isinstance(input_space, Tuple) or isinstance(output_space, Tuple):
            raise ValueError('For tuple action and observation spaces '
                             'consider implementing custom network architecture.')
        self._input_ph = tf.placeholder('float32', shape=[None] + list(input_space.shape),
                                        name='inputs')
        net, end_points = make_dqn_body(self.input_ph, trainable)
        end_points['fc1'] = layers.fully_connected(net, num_outputs=256, activation_fn=tf.nn.relu,
                                                   scope='fc1', trainable=trainable)
        gaussian = tf.random_normal_initializer
        v = layers.fully_connected(end_points['fc1'], num_outputs=1,
                                   activation_fn=None,
                                   weights_initializer=gaussian(0.0, 0.1),
                                   biases_initializer=gaussian(0.05, 0.1),
                                   scope='out_value',
                                   trainable=trainable)
        end_points['out_value'] = tf.squeeze(v)
        header_endpoints = make_a3c_header(net, input_space, output_space, trainable)
        end_points.update(header_endpoints)
        self.end_points = end_points
        self.output_policy = self.output