dueling.py 文件源码-python代码片段

dueling.py 文件源码

python

阅读 18 收藏 0 点赞 0 评论 0

项目：reinforceflow 作者: dbobrenko 项目源码文件源码

def __init__(self, input_space, output_space, layer_sizes=(512, 512), dueling_type='mean',
                 advantage_layers=(256,), value_layers=(256,), trainable=True):
        if isinstance(input_space, Tuple) or isinstance(output_space, Tuple):
            raise ValueError('For tuple action and observation spaces '
                             'consider implementing custom network architecture.')
        self._input_ph = tf.placeholder('float32', shape=[None] + list(input_space.shape),
                                        name='inputs')

        end_points = {}
        net = layers.flatten(self.input_ph)
        for i, units in enumerate(layer_sizes):
            name = 'fc%d' % i
            net = layers.fully_connected(net, num_outputs=units, activation_fn=tf.nn.relu,
                                         trainable=trainable, scope=name)
            end_points[name] = net
        net, dueling_endpoints = make_dueling_header(input_layer=net,
                                                     output_size=output_space.shape[0],
                                                     dueling_type=dueling_type,
                                                     advantage_layers=advantage_layers,
                                                     value_layers=value_layers,
                                                     trainable=trainable)
        end_points.update(dueling_endpoints)
        self._output = net
        self.end_points = end_points