def k_means_clustering(instance_array, n_clusters=9, sin_cos = 1, number_of_starts = 30, seed=None, use_scikit=1,**kwargs):
'''
This runs the k-means clustering algorithm as implemented in scipy - change to scikit-learn?
SH: 7May2013
'''
from sklearn.cluster import KMeans
print 'starting kmeans algorithm, k=%d, retries : %d, sin_cos = %d'%(n_clusters,number_of_starts,sin_cos)
if sin_cos==1:
print ' using sine and cosine of the phases'
sin_cos_instances = np.zeros((instance_array.shape[0],instance_array.shape[1]*2),dtype=float)
sin_cos_instances[:,::2]=np.cos(instance_array)
sin_cos_instances[:,1::2]=np.sin(instance_array)
input_array = sin_cos_instances
#code_book,distortion = vq.kmeans(sin_cos_instances, n_clusters,iter=number_of_starts)
#cluster_assignments, point_distances = vq.vq(sin_cos_instances, code_book)
else:
print ' using raw phases'
input_array = instance_array
#code_book,distortion = vq.kmeans(instance_array, n_clusters,iter=number_of_starts)
#cluster_assignments, point_distances = vq.vq(instance_array, code_book)
#pickle.dump(multiple_run_results,file(k_means_output_filename,'w'))
if use_scikit:
print 'using scikit learn'
tmp = KMeans(init='k-means++', n_clusters=n_clusters, n_init = number_of_starts, n_jobs=1, random_state = seed)
cluster_assignments = tmp.fit_predict(input_array)
code_book = tmp.cluster_centers_
else:
print 'using vq from scipy'
code_book,distortion = vq.kmeans(input_array, n_clusters,iter=number_of_starts)
cluster_assignments, point_distances = vq.vq(input_array, code_book)
if sin_cos:
cluster_details = {'k_means_centroids_sc':code_book}
else:
cluster_details = {'k_means_centroids':code_book}
return cluster_assignments, cluster_details
##################################################################################
#############################k-means periodic algorithm##############################
评论列表
文章目录