def EM_VMM_clustering(instance_array, n_clusters = 9, n_iterations = 20, n_cpus=1, start='random'):
#This is for the new method...
instance_array_complex = np.exp(1j*instance_array)
kappa_lookup = np.linspace(0,100,10000)
bessel_lookup_table = [spec.iv(1,kappa_lookup)/spec.iv(0,kappa_lookup), kappa_lookup]
print '...'
n_dimensions = instance_array.shape[1]
iteration = 1
#First assignment step
mu_list = np.ones((n_clusters,n_dimensions),dtype=float)
kappa_list = np.ones((n_clusters,n_dimensions),dtype=float)
LL_list = []
if start=='k_means':
print 'Initialising clusters using a fast k_means run'
cluster_assignments, cluster_details = k_means_clustering(instance_array, n_clusters=n_clusters, sin_cos = 1, number_of_starts = 1)
mu_list, kappa_list, convergence_mu, convergence_kappa = _EM_VMM_maximisation_step_hard(mu_list, kappa_list, instance_array, cluster_assignments, instance_array_complex = instance_array_complex, bessel_lookup_table= bessel_lookup_table, n_cpus=n_cpus)
elif start=='EM_GMM':
print 'Initialising clusters using a EM_GMM run'
cluster_assignments, cluster_details = EM_GMM_clustering(instance_array, n_clusters=n_clusters, sin_cos = 0, number_of_starts = 1)
mu_list, kappa_list, convergence_mu, convergence_kappa = _EM_VMM_maximisation_step_hard(mu_list, kappa_list, instance_array, cluster_assignments, instance_array_complex = instance_array_complex, bessel_lookup_table = bessel_lookup_table, n_cpus=n_cpus)
else:
print 'Initialising clusters using random start points'
mu_list = np.random.rand(n_clusters,n_dimensions)*2.*np.pi - np.pi
kappa_list = np.random.rand(n_clusters,n_dimensions)*20
cluster_assignments, L = _EM_VMM_expectation_step_hard(mu_list, kappa_list,instance_array)
while np.min([np.sum(cluster_assignments==i) for i in range(len(mu_list))])<20:#(instance_array.shape[0]/n_clusters/4):
print 'recalculating initial points'
mu_list = np.random.rand(n_clusters,n_dimensions)*2.*np.pi - np.pi
kappa_list = np.random.rand(n_clusters,n_dimensions)*20
cluster_assignments = _EM_VMM_expectation_step_hard(mu_list, kappa_list,instance_array)
print cluster_assignments
convergence_record = []
converged = 0;
while (iteration<=n_iterations) and converged!=1:
start_time = time.time()
cluster_assignments, L = _EM_VMM_expectation_step_hard(mu_list, kappa_list,instance_array)
LL_list.append(L)
mu_list, kappa_list, convergence_mu, convergence_kappa = _EM_VMM_maximisation_step_hard(mu_list, kappa_list, instance_array, cluster_assignments, instance_array_complex = instance_array_complex, bessel_lookup_table = bessel_lookup_table, n_cpus=n_cpus)
print 'Time for iteration %d :%.2f, mu_convergence:%.3f, kappa_convergence:%.3f, LL: %.8e'%(iteration,time.time() - start_time,convergence_mu, convergence_kappa,L)
convergence_record.append([iteration, convergence_mu, convergence_kappa])
if convergence_mu<0.01 and convergence_kappa<0.01:
converged = 1
print 'Convergence criteria met!!'
iteration+=1
print 'AIC : %.2f'%(2*(mu_list.shape[0]*mu_list.shape[1])-2.*LL_list[-1])
cluster_details = {'EM_VMM_means':mu_list, 'EM_VMM_kappas':kappa_list, 'EM_VMM_LL':LL_list}
return cluster_assignments, cluster_details
评论列表
文章目录