clustering.py 文件源码-python代码片段

clustering.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码

def k_means_clustering(self, out_path, pd_data, number_of_clusters):
        headers, repos, features = self.__fetch_data(pd_data)

        kmeans = KMeans(n_clusters=number_of_clusters, random_state=0, n_init=200).fit(features)  # apply kmeans algorithm

        # form clusters
        clusters = []
        for i in range(0, number_of_clusters): # k cluster
            repo_list = []
            for j in range (0, len(kmeans.labels_)):  # a label for each repo.
                if i == kmeans.labels_[j]:  # if repo label is equal to Cluster number
                    repo_list.append(repos[j])  # add repo to cluster i's list.
            clusters.append(repo_list)

        out_file_path = os.path.join(out_path, "kmeans_noOfClusters" + str(number_of_clusters))
        self.__export_k_means_results(kmeans, headers, clusters, out_file_path)  # avoid ".csv"