def get_cluster_assignments(sim_matrix, parameters):
"""
(np.array, list of int) -> list of int
sim_matrix: list of list of float -- similarity matrix between exemplars
parameters: list of parameters in the format ["method:method_name",
"algo:algo_name", "k:num_clusters", "damping:damping"]
where order doesn't matter
(k and damping only relevant for certain clustering methods)
the possible values for each parameter are listed in the
function below.
Returns a list of integers. The integer at each index of the list corresponds
to the cluster number of the exemplar at the same index in sim_matrix.
"""
algorithm = next((re.split(':',f)[1] for f in parameters if f[:4] == 'algo'), 'ap')
# from { 'hierarchical', 'kmeans', 'ap', 'ward' }
method = next((re.split(':',f)[1] for f in parameters if f[:6] == 'method'), 'single')
# from {'single', 'complete', 'average'} (only relevant for hierarchical clustering)
kMk = next((int(re.split(':',f)[1]) for f in parameters if f[:1] == 'k'), 8)
# any integer <= the data length
damping = next((re.split(':',f)[1] for f in parameters if f[:4] == 'damping'), 0.5)
# only relevant for AP -- in [0.5,1]
#
if algorithm == 'hierarchical':
clustering = hierarchy.linkage(sim_matrix, method)
k = get_k(clustering, 20)
cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1
elif algorithm == 'kmeans':
cluster_assignments = KMeans(n_clusters = kMk).fit_predict(sim_matrix)
elif algorithm == 'ap':
cluster_assignments = AffinityPropagation().fit_predict(sim_matrix)
elif algorithm == 'ward':
clustering = hierarchy.ward(sim_matrix)
k = get_k(clustering, 20)
cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1
return cluster_assignments
评论列表
文章目录