def generateClustering(self, assignment_proba, centroids, drop_annotated_instances = False,
cluster_labels = None):
self.clusters = [Cluster() for x in range(self.num_clusters)]
if cluster_labels is not None:
for x in range(self.num_clusters):
self.clusters[x].label = cluster_labels[x]
ids = self.instances.getIds()
for i in range(len(ids)):
instance_id = ids[i]
annotated = self.instances.isAnnotated(instance_id)
c = self.assigned_clusters[i]
proba = None
if assignment_proba is not None:
proba = assignment_proba[i, :]
label = self.instances.getLabel(instance_id)
family = self.instances.getFamily(instance_id)
if centroids is not None:
# Reshape to avoid warning from euclidean_distances
# Does not take 1D array as input
centroid = centroids[c].reshape(1, -1)
features = self.instances.getInstance(instance_id).reshape(1,-1)
distance = euclidean_distances(centroid, features)[0][0]
else:
distance = None
self.clusters[c].addInstance(instance_id, distance, label, family, annotated)
unknown_cluster_id = 0
for c in range(self.num_clusters):
unknown_cluster_id = self.clusters[c].finalComputation(unknown_cluster_id)
评论列表
文章目录