Clustering.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:SecuML 作者: ANSSI-FR 项目源码 文件源码
def generateClustering(self, assignment_proba, centroids, drop_annotated_instances = False,
                           cluster_labels = None):
        self.clusters = [Cluster() for x in range(self.num_clusters)]
        if cluster_labels is not None:
            for x in range(self.num_clusters):
                self.clusters[x].label = cluster_labels[x]
        ids = self.instances.getIds()
        for i in range(len(ids)):
            instance_id = ids[i]
            annotated   = self.instances.isAnnotated(instance_id)
            c           = self.assigned_clusters[i]
            proba       = None
            if assignment_proba is not None:
                proba = assignment_proba[i, :]
            label  = self.instances.getLabel(instance_id)
            family = self.instances.getFamily(instance_id)
            if centroids is not None:
                # Reshape to avoid warning from euclidean_distances
                # Does not take 1D array as input
                centroid = centroids[c].reshape(1, -1)
                features = self.instances.getInstance(instance_id).reshape(1,-1)
                distance = euclidean_distances(centroid, features)[0][0]
            else:
                distance = None
            self.clusters[c].addInstance(instance_id, distance, label, family, annotated)
        unknown_cluster_id = 0
        for c in range(self.num_clusters):
            unknown_cluster_id = self.clusters[c].finalComputation(unknown_cluster_id)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号