def k_means_clustering(self, out_path, pd_data, number_of_clusters):
headers, repos, features = self.__fetch_data(pd_data)
kmeans = KMeans(n_clusters=number_of_clusters, random_state=0, n_init=200).fit(features) # apply kmeans algorithm
# form clusters
clusters = []
for i in range(0, number_of_clusters): # k cluster
repo_list = []
for j in range (0, len(kmeans.labels_)): # a label for each repo.
if i == kmeans.labels_[j]: # if repo label is equal to Cluster number
repo_list.append(repos[j]) # add repo to cluster i's list.
clusters.append(repo_list)
out_file_path = os.path.join(out_path, "kmeans_noOfClusters" + str(number_of_clusters))
self.__export_k_means_results(kmeans, headers, clusters, out_file_path) # avoid ".csv"
clustering.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录