def agglomerative_clustering(self, out_path, pd_data, number_of_clusters):
headers, repos, features = self.__fetch_data(pd_data)
agglomerative_clustering = AgglomerativeClustering(n_clusters=number_of_clusters, linkage="complete")
agglomerative_clustering.fit(features)
# form clusters
clusters = []
for i in range(0, number_of_clusters): # k cluster
repo_list = []
for j in range(0, len(agglomerative_clustering.labels_)): # a label for each repo.
if i == agglomerative_clustering.labels_[j]: # if repo label is equal to Cluster number
repo_list.append(repos[j]) # add repo to cluster i's list.
clusters.append(repo_list)
out_file_path = os.path.join(out_path, "agglomerative_noOfClusters" + str(number_of_clusters))
self.__export_agglomerative_results(agglomerative_clustering, clusters, out_file_path)
clustering.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录