def create_architecture(self, img_input, misc_input, name_scope, reuse=False, **specs):
with arg_scope([layers.conv2d, layers.fully_connected], reuse=reuse), \
arg_scope([], reuse=reuse):
fc_input = self.get_input_layers(img_input, misc_input, name_scope)
fc1 = layers.fully_connected(fc_input, num_outputs=self.fc_units_num, scope=name_scope + "/fc1")
fc2_value = layers.fully_connected(fc1, num_outputs=256, scope=name_scope + "/fc2_value")
value = layers.linear(fc2_value, num_outputs=1, scope=name_scope + "/fc3_value")
fc2_advantage = layers.fully_connected(fc1, num_outputs=256, scope=name_scope + "/fc2_advantage")
advantage = layers.linear(fc2_advantage, num_outputs=self.actions_num, scope=name_scope + "/fc3_advantage")
mean_advantage = tf.reshape(tf.reduce_mean(advantage, axis=1), (-1, 1))
q_op = advantage + (mean_advantage - value)
return q_op
评论列表
文章目录