def minibatchs_k_means_clustering(self, out_path, pd_data, number_of_clusters):
headers, repos, features = self.__fetch_data(pd_data)
mb_kmeans = MiniBatchKMeans(n_clusters=number_of_clusters)
mb_kmeans.fit(features)
clusters = []
for i in range(0, number_of_clusters): # k cluster
repo_list = []
for j in range (0, len(mb_kmeans.labels_)): # a label for each repo.
if i == mb_kmeans.labels_[j]: # if repo label is equal to Cluster number
repo_list.append(repos[j]) # add repo to cluster i's list.
clusters.append(repo_list)
out_file_path = os.path.join(out_path, "mb_kmeans_noOfClusters" + str(number_of_clusters))
self.__export_k_means_results(mb_kmeans, headers, clusters, out_file_path) # avoid ".csv"
clustering.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录