def _EM_VMM_GMM_expectation_step(self,):
#Can probably remove this and modify zij directly
self.probs = self.zij*0
#instance_array_c and instance_array_s are used to speed up cos(instance_array - mu) using
#the trig identity cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
#this removes the need to constantly recalculate cos(a) and sin(a)
for mu_tmp, kappa_tmp, mean_tmp, std_tmp, p_hat, cluster_ident in zip(self.mu_list,self.kappa_list,self.mean_list, self.std_list, self.pi_hat,range(self.n_clusters)):
#norm_fac_exp = self.n_clusters*np.log(1./(2.*np.pi)) - np.sum(np.log(spec.iv(0,kappa_tmp)))
norm_fac_exp_VMM = -self.n_dimensions*np.log(2.*np.pi) - np.sum(np.log(spec.iv(0,kappa_tmp)))
norm_fac_exp_GMM = -self.n_dimensions_amps*np.log(2.*np.pi) - np.sum(np.log(std_tmp))
pt1_GMM = -(self.instance_array_amps - mean_tmp)**2/(2*(std_tmp**2))
#Note the use of instance_array_c and s is from a trig identity
#This speeds this calc up significantly because mu_tmp is small compared to instance_array_c and s
#which can be precalculated
pt1_VMM = kappa_tmp * (self.instance_array_c*np.cos(mu_tmp) + self.instance_array_s*np.sin(mu_tmp))
self.probs[:,cluster_ident] = p_hat * np.exp(np.sum(pt1_VMM,axis=1) + norm_fac_exp_VMM +
np.sum(pt1_GMM,axis=1) + norm_fac_exp_GMM)
#This is to make sure the row sum is 1
prob_sum = (np.sum(self.probs,axis=1))[:,np.newaxis]
self.zij = self.probs/(prob_sum)
#Calculate the log-likelihood - note this is quite an expensive computation and not really necessary
#unless comparing different techniques and/or checking for convergence
#This is to prevent problems with log of a very small number....
#L = np.sum(zij[probs>1.e-20]*np.log(probs[probs>1.e-20]))
#L = np.sum(zij*np.log(np.clip(probs,1.e-10,1)))
评论列表
文章目录