def _EM_GMM_expectation_step(self,):
self.probs = self.zij*0#np.ones((self.instance_array.shape[0],self.n_clusters),dtype=float)
#instance_array_c and instance_array_s are used to speed up cos(instance_array - mu) using
#the trig identity cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
#this removes the need to constantly recalculate cos(a) and sin(a)
for mu_tmp, std_tmp, p_hat, cluster_ident in zip(self.mu_list,self.std_list,self.pi_hat,range(self.n_clusters)):
#norm_fac_exp = self.n_clusters*np.log(1./(2.*np.pi)) - np.sum(np.log(spec.iv(0,kappa_tmp)))
#norm_fac_exp = -self.n_dimensions*np.log(2.*np.pi) - np.sum(np.log(spec.iv(0,std_tmp)))
norm_fac_exp = self.n_dimensions*np.log(1./np.sqrt(2.*np.pi)) + np.sum(np.log(1./std_tmp))
#pt1 = kappa_tmp * (self.instance_array_c*np.cos(mu_tmp) + self.instance_array_s*np.sin(mu_tmp))
pt1 = -(self.instance_array - mu_tmp)**2/(2*(std_tmp**2))
self.probs[:,cluster_ident] = p_hat * np.exp(np.sum(pt1,axis=1) + norm_fac_exp)
prob_sum = (np.sum(self.probs,axis=1))[:,np.newaxis]
self.zij = self.probs/(prob_sum)
#Calculate the log-likelihood - note this is quite an expensive computation and not really necessary
#unless comparing different techniques and/or checking for convergence
#This is to prevent problems with log of a very small number....
#L = np.sum(zij[probs>1.e-20]*np.log(probs[probs>1.e-20]))
#L = np.sum(zij*np.log(np.clip(probs,1.e-10,1)))
#############################################################################
#####################Plotting functions#####################################
评论列表
文章目录