dqn.py 文件源码-python代码片段

dqn.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：deep_rl_vizdoom 作者: mihahauke 项目源码文件源码

def create_architecture(self, img_input, misc_input, name_scope, reuse=False, **specs):
        with arg_scope([layers.conv2d, layers.fully_connected], reuse=reuse), \
             arg_scope([], reuse=reuse):
            fc_input = self.get_input_layers(img_input, misc_input, name_scope)

            fc1 = layers.fully_connected(fc_input, num_outputs=self.fc_units_num, scope=name_scope + "/fc1")

            fc2_value = layers.fully_connected(fc1, num_outputs=256, scope=name_scope + "/fc2_value")
            value = layers.linear(fc2_value, num_outputs=1, scope=name_scope + "/fc3_value")

            fc2_advantage = layers.fully_connected(fc1, num_outputs=256, scope=name_scope + "/fc2_advantage")
            advantage = layers.linear(fc2_advantage, num_outputs=self.actions_num, scope=name_scope + "/fc3_advantage")

            mean_advantage = tf.reshape(tf.reduce_mean(advantage, axis=1), (-1, 1))
            q_op = advantage + (mean_advantage - value)
            return q_op