def initialisation(self):
'''This involves generating the mu and kappa arrays
Then initialising based on self.start using k-means, EM-GMM or
giving every instance a random probability of belonging to each cluster
SH: 7June2013
'''
self.mu_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.kappa_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.LL_list = []
self.zij = np.zeros((self.instance_array.shape[0],self.n_clusters),dtype=float)
if self.start=='k_means':
print 'Initialising clusters using a fast k_means run'
self.cluster_assignments, self.cluster_details = k_means_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 3, seed=self.seed)
for i in list(set(self.cluster_assignments)):
self.zij[self.cluster_assignments==i,i] = 1
print 'finished initialising'
elif self.start=='EM_GMM':
self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
for i in list(set(cluster_assignments)):
self.zij[cluster_assignments==i,i] = 1
else:
print 'going with random option'
#need to get this to work better.....
self.zij = np.random.random(self.zij.shape)
#and normalise so each row adds up to 1....
self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
self._EM_VMM_maximisation_step()
评论列表
文章目录