clustering.py 文件源码-python代码片段

clustering.py 文件源码

python

阅读 37 收藏 0 点赞 0 评论 0

项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码

def agglomerative_clustering(self, out_path, pd_data, number_of_clusters):
        headers, repos, features = self.__fetch_data(pd_data)

        agglomerative_clustering = AgglomerativeClustering(n_clusters=number_of_clusters, linkage="complete")
        agglomerative_clustering.fit(features)

        # form clusters
        clusters = []
        for i in range(0, number_of_clusters):  # k cluster
            repo_list = []
            for j in range(0, len(agglomerative_clustering.labels_)):  # a label for each repo.
                if i == agglomerative_clustering.labels_[j]:  # if repo label is equal to Cluster number
                    repo_list.append(repos[j])  # add repo to cluster i's list.
            clusters.append(repo_list)

        out_file_path = os.path.join(out_path, "agglomerative_noOfClusters" + str(number_of_clusters))
        self.__export_agglomerative_results(agglomerative_clustering, clusters, out_file_path)