def clusterSimilarityWithSklearnAPC(data_file,damping=0.9,max_iter=200,convergence_iter=15,preference='min'):
"""
Compare Sparse Affinity Propagation (SAP) result with SKlearn Affinity Propagation (AP) Clustering result.
Please note that convergence condition for Sklearn AP is "no change in the number of estimated clusters",
for SAP the condition is "no change in the cluster assignment".
So SAP may take more iterations and the there will be slightly difference in final cluster assignment (exemplars for each sample).
"""
# loading data
simi_mat=loadMatrix(data_file)
simi_mat_dense=simi_mat.todense()
# get preference
if preference=='min':
preference=np.min(simi_mat_dense)
elif preference=='median':
preference=np.median(simi_mat_dense)
print('{0}, start SKlearn Affinity Propagation'.format(datetime.now()))
af=AffinityPropagation(damping=damping, preference=preference, affinity='precomputed',verbose=True)
af.fit(simi_mat_dense)
cluster_centers_indices,labels = af.cluster_centers_indices_,af.labels_
sk_exemplars=np.asarray([cluster_centers_indices[i] for i in labels])
print('{0}, start Fast Sparse Affinity Propagation Cluster'.format(datetime.now()))
sap=SAP(preference=preference,convergence_iter=convergence_iter,max_iter=max_iter,damping=damping,verboseIter=100)
sap_exemplars=sap.fit_predict(simi_mat_dense)
# Caculate similarity between sk_exemplars and sap_exemplars
exemplars_similarity=sparseAP_cy.arrSamePercent(np.array(sk_exemplars), np.array(sap_exemplars))
return exemplars_similarity
评论列表
文章目录