clustering.py 文件源码-python代码片段

clustering.py 文件源码

python

阅读 35 收藏 0 点赞 0 评论 0

项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码

def minibatchs_k_means_clustering(self, out_path, pd_data, number_of_clusters):
        headers, repos, features = self.__fetch_data(pd_data)

        mb_kmeans = MiniBatchKMeans(n_clusters=number_of_clusters)
        mb_kmeans.fit(features)

        clusters = []
        for i in range(0, number_of_clusters): # k cluster
            repo_list = []
            for j in range (0, len(mb_kmeans.labels_)):  # a label for each repo.
                if i == mb_kmeans.labels_[j]:  # if repo label is equal to Cluster number
                    repo_list.append(repos[j])  # add repo to cluster i's list.
            clusters.append(repo_list)
        out_file_path = os.path.join(out_path, "mb_kmeans_noOfClusters" + str(number_of_clusters))
        self.__export_k_means_results(mb_kmeans, headers, clusters, out_file_path)  # avoid ".csv"