clustering.py 文件源码-python代码片段

def _EM_VMM_GMM_expectation_step(self,):

        #Can probably remove this and modify zij directly
        self.probs = self.zij*0
        #instance_array_c and instance_array_s are used to speed up cos(instance_array - mu) using
        #the trig identity cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
        #this removes the need to constantly recalculate cos(a) and sin(a)
        for mu_tmp, kappa_tmp, mean_tmp, std_tmp, p_hat, cluster_ident in zip(self.mu_list,self.kappa_list,self.mean_list, self.std_list, self.pi_hat,range(self.n_clusters)):
            #norm_fac_exp = self.n_clusters*np.log(1./(2.*np.pi)) - np.sum(np.log(spec.iv(0,kappa_tmp)))
            norm_fac_exp_VMM = -self.n_dimensions*np.log(2.*np.pi) - np.sum(np.log(spec.iv(0,kappa_tmp)))
            norm_fac_exp_GMM = -self.n_dimensions_amps*np.log(2.*np.pi) - np.sum(np.log(std_tmp))
            pt1_GMM = -(self.instance_array_amps - mean_tmp)**2/(2*(std_tmp**2))
            #Note the use of instance_array_c and s is from a trig identity
            #This speeds this calc up significantly because mu_tmp is small compared to instance_array_c and s
            #which can be precalculated
            pt1_VMM = kappa_tmp * (self.instance_array_c*np.cos(mu_tmp) + self.instance_array_s*np.sin(mu_tmp))
            self.probs[:,cluster_ident] = p_hat * np.exp(np.sum(pt1_VMM,axis=1) + norm_fac_exp_VMM +
                                                         np.sum(pt1_GMM,axis=1) + norm_fac_exp_GMM)
        #This is to make sure the row sum is 1
        prob_sum = (np.sum(self.probs,axis=1))[:,np.newaxis]
        self.zij = self.probs/(prob_sum)
        #Calculate the log-likelihood - note this is quite an expensive computation and not really necessary
        #unless comparing different techniques and/or checking for convergence
        #This is to prevent problems with log of a very small number....
        #L = np.sum(zij[probs>1.e-20]*np.log(probs[probs>1.e-20]))
        #L = np.sum(zij*np.log(np.clip(probs,1.e-10,1)))