clustering.py 文件源码-python代码片段

def _EM_GMM_expectation_step(self,):
        self.probs = self.zij*0#np.ones((self.instance_array.shape[0],self.n_clusters),dtype=float)
        #instance_array_c and instance_array_s are used to speed up cos(instance_array - mu) using
        #the trig identity cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
        #this removes the need to constantly recalculate cos(a) and sin(a)
        for mu_tmp, std_tmp, p_hat, cluster_ident in zip(self.mu_list,self.std_list,self.pi_hat,range(self.n_clusters)):
            #norm_fac_exp = self.n_clusters*np.log(1./(2.*np.pi)) - np.sum(np.log(spec.iv(0,kappa_tmp)))
            #norm_fac_exp = -self.n_dimensions*np.log(2.*np.pi) - np.sum(np.log(spec.iv(0,std_tmp)))
            norm_fac_exp = self.n_dimensions*np.log(1./np.sqrt(2.*np.pi)) + np.sum(np.log(1./std_tmp))
            #pt1 = kappa_tmp * (self.instance_array_c*np.cos(mu_tmp) + self.instance_array_s*np.sin(mu_tmp))
            pt1 = -(self.instance_array - mu_tmp)**2/(2*(std_tmp**2))
            self.probs[:,cluster_ident] = p_hat * np.exp(np.sum(pt1,axis=1) + norm_fac_exp)
        prob_sum = (np.sum(self.probs,axis=1))[:,np.newaxis]
        self.zij = self.probs/(prob_sum)
        #Calculate the log-likelihood - note this is quite an expensive computation and not really necessary
        #unless comparing different techniques and/or checking for convergence
        #This is to prevent problems with log of a very small number....
        #L = np.sum(zij[probs>1.e-20]*np.log(probs[probs>1.e-20]))
        #L = np.sum(zij*np.log(np.clip(probs,1.e-10,1)))



#############################################################################
#####################Plotting functions#####################################