def comparative_exp():
"""
Runs a series of clustering experiments for different parameter settings.
"""
data_path = sys.argv[1] # path to data set
stem_dict_path = sys.argv[2] # path to stemming dictionary
parameters = ['SPLIT', 'noUF']
d = data(data_path, stem_dict_path, parameters)
#
#
clustering_algos = [(a,m,k) for a in ['hierarchical', 'ward', 'ap', 'kmeans']
for m in [None,'complete','average','single']
for k in [None,2,3,4,5,6,7,8,9,10]
if (m != None and k == None and a == 'hierarchical') or
(m == None and k != None and a == 'kmeans') or
(m == None and k == None and a in ['ward', 'ap'])]
for onto_cat in ['thing', 'body']:
parameters_i = parameters + ['onto:%s' % onto_cat]
oix = sorted(set(np.where(d.ontological == onto_cat)[0]))
similarity_matrix = get_similarity_matrix(d, parameters_i, oix, association = 'associated')
for a,m,k in clustering_algos:
parameters_j = parameters_i + ['algo:%s' % a]
if m != None: parameters_j.append('method:%s' % m)
if k != None: parameters_j.append('k:%r' % k)
print(parameters_j)
cluster_assignments = get_cluster_assignments(similarity_matrix, parameters_j)
print(set(cluster_assignments))
print(evaluate_clustering(cluster_assignments, d.annotation[oix]))
print_confusion_matrix(cluster_assignments, d.annotation[oix])
评论列表
文章目录