python类GMM的实例源码

test_gmm.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_verbose_second_level():
    # Create sample data
    X = rng.randn(30, 5)
    X[:10] += 2
    g = mixture.GMM(n_components=2, n_init=2, verbose=2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        g.fit(X)
    finally:
        sys.stdout = old_stdout
ProbabilisticModel.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def fit(self, data):
        gmm = GMM(n_components=1, covariance_type=self.cv_type)
        gmm.fit(data)
        self.mean = gmm.means_[0]
        if self.cv_type == 'full':
            self.cov = gmm.covars_[0]
        elif self.cv_type == 'tied':
            self.cov = gmm.covars_
        else:
            self.cov = np.diag(gmm.covars_[0])
ProbabilisticModel.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fit(self, data):
        if self.n is None:
            means = []
            stds = []
            weights = []
            score = []
            for n in range(self.n_min, self.n_max):
                gmm = GMM(n_components=n, covariance_type='full')
                gmm.fit(data)
                means.append(gmm.means_)
                stds.append(gmm.covars_)
                weights.append(gmm.weights_)
                if self.n_estimator == 'BIC':
                    score.append(gmm.bic(data))

            i_best = self._chosebestformetric(self.n_estimator, score)

            self.means = means[i_best]
            self.stds = stds[i_best]
            self.weights = weights[i_best]

        else:
            gmm = GMM(n_components=self.n, covariance_type='full')
            gmm.fit(data)
            self.means = gmm.means_
            self.stds = gmm.covars_
            self.weights = gmm.weights_
ProbabilisticModel.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def fit(self, data):
        if self.n is None:
            means = []
            covs = []
            weights = []
            score = []
            for n in range(self.n_min, self.n_max):
                gmm = GMM(n_components=n, covariance_type=self.cv_type)
                gmm.fit(data)
                means.append(gmm.means_)
                if self.cv_type == 'full':
                    covs.append(gmm.covars_)
                elif self.cv_type == 'tied':
                    covs.append(np.tile(gmm.covars_, (n, 1, 1)))
                else:
                    covs.append(np.array([np.diag(cv) for cv in gmm.covars_]))
                weights.append(gmm.weights_)
                if self.n_estimator == 'BIC':
                    score.append(gmm.bic(data))

            i_best = self._chosebestformetric(self.n_estimator, score)

            self.means = means[i_best]
            self.covs = covs[i_best]
            self.weights = weights[i_best]

        else:
            gmm = GMM(n_components=self.n, covariance_type=self.cv_type)
            gmm.fit(data)
            self.means = gmm.means_
            if self.cv_type == 'full':
                self.covs = gmm.covars_
            elif self.cv_type == 'tied':
                self.covs = np.tile(gmm.covars_, (n, 1, 1))
            else:
                self.covs = np.array([np.diag(cv) for cv in gmm.covars_])
            self.weights = gmm.weights_
ShaleVolume.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def GRsd (GRlog):
    em = GMM(n_components=3)
    em.fit(GRlog.reshape((-1, 1)))
    idxminmeangr = np.argmin(em.means_)
    grsd = em.means_[idxminmeangr] - em.covars_[idxminmeangr]**0.5

    return grsd
ShaleVolume.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 14 收藏 0 点赞 0 评论 0
def GRsh (GRlog):
    em = GMM(n_components=3)
    em.fit(GRlog.reshape((-1, 1)))
    idxmaxmeangr = np.argmax(em.means_)
    grsh = em.means_[idxmaxmeangr] + em.covars_[idxmaxmeangr]**0.5

    return grsh
ShaleVolume.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def SPsh (SPlog):
    em = GMM(n_components=3)
    em.fit(SPlog.reshape((-1, 1)))
    idxmaxmeangr = np.argmax(em.means_)
    spsh = em.means_[idxmaxmeangr] + em.covars_[idxmaxmeangr]**0.5

    return spsh
ProbabilisticModel.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fit(self, data):
        gmm = GMM(n_components=1, covariance_type=self.cv_type)
        gmm.fit(data)
        self.mean = gmm.means_[0]
        if self.cv_type == 'full':
            self.cov = gmm.covars_[0]
        elif self.cv_type == 'tied':
            self.cov = gmm.covars_
        else:
            self.cov = np.diag(gmm.covars_[0])
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def EM_GMM_clustering(instance_array, n_clusters=9, sin_cos = 0, number_of_starts = 10, show_covariances = 0, clim=None, covariance_type='diag'):
    print 'starting EM-GMM algorithm from sckit-learn, k=%d, retries : %d, sin_cos = %d'%(n_clusters,number_of_starts,sin_cos)
    if sin_cos==1:
        print '  using sine and cosine of the phases'
        sin_cos_instances = np.zeros((instance_array.shape[0],instance_array.shape[1]*2),dtype=float)
        sin_cos_instances[:,::2]=np.cos(instance_array)
        sin_cos_instances[:,1::2]=np.sin(instance_array)
        input_data = sin_cos_instances
    else:
        print '  using raw phases'
        input_data = instance_array
    gmm = mixture.GMM(n_components=n_clusters,covariance_type=covariance_type,n_init=number_of_starts)
    gmm.fit(input_data)
    cluster_assignments = gmm.predict(input_data)
    bic_value = gmm.bic(input_data)
    LL = np.sum(gmm.score(input_data))
    gmm_covars_tmp = np.array(gmm._get_covars())
    if show_covariances:
        fig, ax = make_grid_subplots(gmm_covars_tmp.shape[0], sharex = True, sharey = True)
        im = []
        for i in range(gmm_covars_tmp.shape[0]):
            im.append(ax[i].imshow(np.abs(gmm_covars_tmp[i,:,:]),aspect='auto'))
            print im[-1].get_clim()
            if clim==None:
                im[-1].set_clim([0, im[-1].get_clim()[1]*0.5])
            else:
                im[-1].set_clim(clim)
        clims = [np.min(np.abs(gmm_covars_tmp)),np.max(np.abs(gmm_covars_tmp))*0.5]
        #for i in im : i.set_clim(clims)
        fig.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
        fig.canvas.draw();fig.show()

    gmm_covars = np.array([np.diagonal(i) for i in gmm._get_covars()])
    gmm_means = gmm.means_
    if sin_cos:
        cluster_details = {'EM_GMM_means_sc':gmm_means, 'EM_GMM_variances_sc':gmm_covars, 'EM_GMM_covariances_sc':gmm_covars_tmp,'BIC':bic_value, 'LL':LL}
    else:
        cluster_details = {'EM_GMM_means':gmm_means, 'EM_GMM_variances':gmm_covars, 'EM_GMM_covariances':gmm_covars_tmp, 'BIC':bic_value,'LL':LL}
    return cluster_assignments, cluster_details
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def initialisation(self):
        '''This involves generating the mu and kappa arrays
        Then initialising based on self.start using k-means, EM-GMM or
        giving every instance a random probability of belonging to each cluster
        SH: 7June2013
        '''
        self.mu_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.kappa_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.LL_list = []
        self.zij = np.zeros((self.instance_array.shape[0],self.n_clusters),dtype=float)
        if self.start=='k_means':
            print 'Initialising clusters using a fast k_means run'
            self.cluster_assignments, self.cluster_details = k_means_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 3, seed=self.seed)
            for i in list(set(self.cluster_assignments)):
                self.zij[self.cluster_assignments==i,i] = 1
            print 'finished initialising'
        elif self.start=='EM_GMM':
            self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
            for i in list(set(cluster_assignments)):
                self.zij[cluster_assignments==i,i] = 1
        else:
            print 'going with random option'
            #need to get this to work better.....
            self.zij = np.random.random(self.zij.shape)
            #and normalise so each row adds up to 1....
            self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
        self._EM_VMM_maximisation_step()
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def initialisation(self):
        '''This involves generating the mu and kappa arrays
        Then initialising based on self.start using k-means, EM-GMM or
        giving every instance a random probability of belonging to each cluster
        SH: 7June2013
        '''
        self.mu_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.std_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.LL_list = []
        self.zij = np.zeros((self.instance_array.shape[0],self.n_clusters),dtype=float)
        if self.start=='k_means':
            print 'Initialising clusters using a fast k_means run'
            self.cluster_assignments, self.cluster_details = k_means_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 3, seed=self.seed)
            for i in list(set(self.cluster_assignments)):
                self.zij[self.cluster_assignments==i,i] = 1
            print 'finished initialising'
        elif self.start=='EM_GMM':
            self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
            for i in list(set(cluster_assignments)):
                self.zij[cluster_assignments==i,i] = 1
        else:
            print 'going with random option'
            #need to get this to work better.....
            self.zij = np.random.random(self.zij.shape)
            #and normalise so each row adds up to 1....
            self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
        self._EM_GMM_maximisation_step()
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _initialisation(self):
        '''This involves generating the mu and kappa arrays
        Then initialising based on self.start using k-means, EM-GMM or
        giving every instance a random probability of belonging to each cluster
        SH: 7June2013
        '''
        self.mean_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.std_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
        self.LL_list = []
        self.zij = np.zeros((self.n_instances, self.n_clusters),dtype=float)
        #maybe only the random option is valid here.....
        if self.start=='k_means':
            print 'Initialising clusters using a fast k_means run'
            self.cluster_assignments, self.cluster_details = k_means_clustering(self.input_data, n_clusters=self.n_clusters, sin_cos = 0, number_of_starts = 4, seed=self.seed)
            for i in list(set(self.cluster_assignments)):
                self.zij[self.cluster_assignments==i,i] = 1
            #print 'finished initialising'
        elif self.start=='EM_GMM':
            self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.input_data, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
            for i in list(set(self.cluster_assignments)):
                self.zij[self.cluster_assignments==i,i] = 1
        else:
            print 'going with random option'
            #need to get this to work better.....
            self.zij = np.random.random(self.zij.shape)
            #and normalise so each row adds up to 1....
            self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
        self._EM_VMM_GMM_maximisation_step()
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def EM_GMM_GMM_clustering(instance_array_amps, n_clusters=9, sin_cos = 0, number_of_starts = 10, show_covariances = 0, clim=None, covariance_type='diag', n_iter = 50):
    '''
    Cluster using a Gaussian for the real and imag part of the ratio of the complex value between adjacent channels
    Supposed to be for imaging diagnostics

    SRH: 18May2014
    '''
    print 'starting EM-GMM-GMM algorithm from sckit-learn, clusters=%d, retries : %d'%(n_clusters,number_of_starts)
    #tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
    #for i in range(1,instance_array_amps.shape[1]):
    #    tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
    #print 'ratio :', np.sum(np.abs(np.imag(instance_array_amps)))/np.sum(np.abs(np.real(instance_array_amps)))
    data_complex = instance_array_amps/np.sum(instance_array_amps,axis = 1)[:,np.newaxis]
    #data_complex = instance_array_amps/(instance_array_amps[:,2])[:,np.newaxis]
    #print 'hello..', instance_array_amps.shape
    input_data = np.hstack((np.real(data_complex), np.real(data_complex)))
    #k_means_cluster_assignments, k_means_cluster_details = k_means_clustering(input_data, n_clusters=n_clusters, sin_cos = 1, number_of_starts = 3,)
    #print k_means_cluster_assignments
    #input_data = np.hstack((np.abs(data_complex),(np.abs(data_complex))))
    n_dim = data_complex.shape[1]
    #print n_clusters
    gmm = mixture.GMM(n_components = n_clusters, covariance_type = covariance_type, n_init = number_of_starts, n_iter = n_iter,)
    gmm.fit(input_data)
    cluster_assignments = gmm.predict(input_data)
    bic_value = gmm.bic(input_data)
    LL = np.sum(gmm.score(input_data))

    #Extract the means, variances and covariances
    gmm_covars = np.array(gmm._get_covars())
    gmm_vars = np.array([np.diagonal(i) for i in gmm._get_covars()])
    gmm_vars_re, gmm_vars_im = np.hsplit(gmm_vars,2)
    gmm_covars_re = np.array([i[0:n_dim,0:n_dim] for i in gmm._get_covars()])
    gmm_covars_im = np.array([i[n_dim:,n_dim:] for i in gmm._get_covars()])
    gmm_means = gmm.means_
    gmm_means_re, gmm_means_im = np.hsplit(gmm_means, 2)
    #Bundle up the answer
    cluster_details = {'EM_GMM_means':gmm_means, 'EM_GMM_variances':gmm_vars, 'EM_GMM_covariances':gmm_covars, 'EM_GMM_means_re':gmm_means_re, 'EM_GMM_variances_re':gmm_vars_re, 'EM_GMM_covariances_re':gmm_covars_re,'EM_GMM_means_im':gmm_means_im, 'EM_GMM_variances_im':gmm_vars_im, 'EM_GMM_covariances_im':gmm_covars_im,'BIC':bic_value,'LL':LL}
    print 'EM_GMM_GMM Converged: ', gmm.converged_

    return cluster_assignments, cluster_details
models.py 文件源码 项目:bof-aed 作者: rgrzeszi 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def train(self, datadict, labels, rand_features=True):
        '''
        Trains a scipy GMM for each class, joins them into a super codebook.
        @param datadict: Dictionary of class labels. 
        Inside each label there is a list of feature matrices for each window [frames x feature]
        @param labels: the labels of the datadict in a given order
        @param rand_features: Shuffles the samples before running the GMM
        '''
        self.criterion = []
        # Stack the features
        allfeatures = np.vstack(list([np.vstack(x) for x in datadict.values()]))

        # Determine the normalisation statistics and remember them
        self.norm = FeatureNormalizer()
        self.norm.setup(allfeatures)


        # Get number of classes
        ncl = len(labels)
        # Compute vocabsize per class
        vocab_size_per_cl = max(1, self.vocab_size / ncl)
        # Update vocabsize to account for rounding errors
        self.vocab_size = vocab_size_per_cl * ncl

        #
        # Train GMMs for each class
        #
        self.gmms = {}
        self.labels = labels
        for label in labels:
            # Compute feature representations
            feats = np.vstack(datadict[label])
            if rand_features:
                np.random.shuffle(feats)
            if self.normalize:
                norm_features = self.norm.normalize(feats)
            else:
                norm_features = (feats)
            print >> sys.stderr, ("Training a GMM for label %s, using scipy and data of shape %s"
                                 % (label, str(np.shape(norm_features))))
            # Train the gmm
            sub_gmm = GMM(vocab_size_per_cl, covariance_type='diag', n_iter=100)
            sub_gmm.fit(norm_features)
            # Set GMM for class
            self.gmms[label] = sub_gmm
        #
        # Combine GMMs to super codebook
        #
        self.compute_super_codebook(allfeatures.shape[1])
        return
data.py 文件源码 项目:kaggle-Kobe-Bryant-Shot-Selection 作者: shiba24 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def preproc(self):
        self.df["time_remaining"] = self.df["minutes_remaining"] * 60 + self.df["seconds_remaining"]
        self.df['last_5_sec'] = self.df['time_remaining'] < 5
        self.df['latter_half'] = self.df['time_remaining'] < 360
        self.df['first_period'] = self.df['period'] == 1
        self.df['latter_period'] = self.df['period'] > 2
        self.df['last_period'] = self.df['period'] == 4
        self.df['last_quarter'] = self.df['time_remaining'] < 180

        threshold = 3
        anomaly = 14
        self.df['last_moment'] = self.df.apply(lambda row: row['time_remaining'] < threshold or row['time_remaining'] == anomaly, axis=1)
        self.df['away'] = self.df.matchup.str.contains('@')
        self.df['secondsFromStart'] = 60 * (11 - self.df['minutes_remaining']) + (60 - self.df['seconds_remaining'])
        self.df['secondsFromGameStart'] = (self.df['period'] <= 4).astype(int) * (self.df['period'] - 1) * 12 * 60 + (self.df['period'] > 4).astype(int) * ((self.df['period'] - 4) * 5 * 60 + 3 * 12 * 60) + self.df['secondsFromStart']
        numGaussians = 13
        gaussianMixtureModel = mixture.GMM(n_components=numGaussians, covariance_type='full', 
                                           params='wmc', init_params='wmc',
                                           random_state=1, n_init=3,  verbose=0)
        gaussianMixtureModel.fit(self.df.ix[:,['loc_x','loc_y']])
        self.df['shotLocationCluster'] = gaussianMixtureModel.predict(self.df.ix[:,['loc_x','loc_y']])
        self.df['homeGame'] = self.df['matchup'].apply(lambda x: 1 if (x.find('@') < 0) else 0)

        self.df["game_year"] = pd.Series([int(self.df["game_date"][i][:4]) for i in range(0, len(self.df))])
        self.df["game_month"] = pd.Series([int(self.df["game_date"][i][5:7]) for i in range(0, len(self.df))])
        self.df["game_day"] = pd.Series([int(self.df["game_date"][i][-2:]) for i in range(0, len(self.df))])

        action_type_list = list(set(self.df["action_type"].tolist()))
        self.df["action_type_num"] = pd.Series([action_type_list.index(self.df["action_type"][i]) for i in range(0, len(self.df))])

        combined_shot_type_list = list(set(self.df["combined_shot_type"].tolist()))
        self.df["combined_shot_type_num"] = pd.Series([combined_shot_type_list.index(self.df["combined_shot_type"][i]) for i in range(0, len(self.df))])

        opponent_list = list(set(self.df["opponent"].tolist()))
        self.df["opponent_num"] = pd.Series([opponent_list.index(self.df["opponent"][i]) for i in range(0, len(self.df))])

        game_id_list = list(set(self.df["game_id"].tolist()))
        self.df["game_id_num"] = pd.Series([game_id_list.index(self.df["game_id"][i]) for i in range(0, len(self.df))])

        season_list = list(set(self.df["season"].tolist()))
        season_list.sort()
        self.df["season_num"] = pd.Series([season_list.index(self.df["season"][i]) for i in range(0, len(self.df))])

        self.df["shot_distance"][self.df["shot_distance"] > 45] = 45

        # del self.df["team_id"], self.df["team_name"], self.df["game_event_id"], self.df["lat"], self.df["lon"]
        # return self.df
test_gmm.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def check_positive_definite_covars(covariance_type):
    r"""Test that covariance matrices do not become non positive definite

    Due to the accumulation of round-off errors, the computation of the
    covariance  matrices during the learning phase could lead to non-positive
    definite covariance matrices. Namely the use of the formula:

    .. math:: C = (\sum_i w_i  x_i x_i^T) - \mu \mu^T

    instead of:

    .. math:: C = \sum_i w_i (x_i - \mu)(x_i - \mu)^T

    while mathematically equivalent, was observed a ``LinAlgError`` exception,
    when computing a ``GMM`` with full covariance matrices and fixed mean.

    This function ensures that some later optimization will not introduce the
    problem again.
    """
    rng = np.random.RandomState(1)
    # we build a dataset with 2 2d component. The components are unbalanced
    # (respective weights 0.9 and 0.1)
    X = rng.randn(100, 2)
    X[-10:] += (3, 3)  # Shift the 10 last points

    gmm = mixture.GMM(2, params="wc", covariance_type=covariance_type,
                      min_covar=1e-3)

    # This is a non-regression test for issue #2640. The following call used
    # to trigger:
    # numpy.linalg.linalg.LinAlgError: 2-th leading minor not positive definite
    gmm.fit(X)

    if covariance_type == "diag" or covariance_type == "spherical":
        assert_greater(gmm.covars_.min(), 0)
    else:
        if covariance_type == "tied":
            covs = [gmm.covars_]
        else:
            covs = gmm.covars_

        for c in covs:
            assert_greater(np.linalg.det(c), 0)
Clustering.py 文件源码 项目:GRIPy 作者: giruenf 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def expectation_maximization(data, nc, cv_type='full', req_info=None):
    gmm = GMM(n_components=nc, covariance_type=cv_type, thresh=1.0E-4, n_init=10)
    gmm.fit(data)

    labels = gmm.predict(data)

    if req_info == 'all':
        req_info = ['aic', 'bic', 'converged', 'weights', 'means', 'covars',
                    'silhouette', 'proba']
    elif req_info is None:
        req_info = []

    info = {}
    if 'aic' in req_info:
        info['aic'] = gmm.aic(data)
    if 'bic' in req_info:
        info['bic'] = gmm.bic(data)
    if 'converged' in req_info:
        info['converged'] = gmm.converged_
    if 'weights' in req_info:
        info['weights'] = gmm.weights_
    if 'means' in req_info:
        info['means'] = gmm.means_
    if 'covars' in req_info:
        if cv_type == 'full':
            info['covars'] = gmm.covars_
        elif cv_type == 'tied':
            cov = np.empty((nc, gmm.covars_.shape[0], gmm.covars_.shape[1]))
            for i in range(nc):
                cov[i] = gmm.covars_.copy()
            info['covars'] = cov
        else:
            cov = np.empty((nc, gmm.covars_.shape[0], gmm.covars_.shape[1]))
            for i in range(nc):
                cov[i] = np.diag(gmm.covars_[i])
            info['covars'] = cov
    if 'silhouette' in req_info:
        info['silhouette'] = metrics.silhouette_score(data, labels)
    if 'proba' in req_info:
        info['proba'] = gmm.predict_proba(data).T

    return labels, info
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_dimension_histograms_GMM_GMM(self,pub_fig = 0, filename='plot_dim_hist.pdf',specific_dimensions = None, extra_txt_labels = '', label_loc = [-2,1.5], ylim = None):
        '''For each dimension in the data set, plot the histogram of the real and imag part of the measurements
        overlay the GMM's - used for the GMM-GMM clustering method

        SRH: 18May2014
        '''
        suptitle = self.settings.__str__().replace("'",'').replace("{",'').replace("}",'')
        cluster_mu = self.cluster_details['EM_GMM_means_re'] + 1j*self.cluster_details['EM_GMM_means_im']
        cluster_sigma = self.cluster_details['EM_GMM_variances_re'] + 1j*self.cluster_details['EM_GMM_variances_im']
        dimensions = cluster_mu.shape[1]

        instance_array_amps = self.feature_obj.misc_data_dict['mirnov_data']
        tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
        tmp = instance_array_amps/np.sum(instance_array_amps, axis = 1)[:,np.newaxis]
        #for i in range(1,instance_array_amps.shape[1]): tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
        if specific_dimensions == None: specific_dimensions = range(dimensions)
        fig_re, ax_re = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
        fig_im, ax_im = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
        for i,dim in enumerate(specific_dimensions):
            ax_re[i].hist(np.real(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
            ax_im[i].hist(np.imag(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])

        if self.cluster_assignments!=None: cluster_list = list(set(self.cluster_assignments))
        x = np.linspace(-np.pi, np.pi, 300)
        cluster_prob_list = []
        for cluster in cluster_list:
            cluster_prob_list.append(float(np.sum(self.cluster_assignments==cluster))/float(len(self.cluster_assignments)))
        for i, dimension in enumerate(specific_dimensions):
            for ax_cur, op in zip([ax_re,ax_im],[np.real, np.imag]):
                cluster_sum = x*0
                for cluster, cluster_prob in zip(cluster_list, cluster_prob_list):
                    Z_EM = cluster_prob * norm(loc=op(cluster_mu[cluster][dimension]), scale=np.sqrt(op(cluster_sigma[cluster][dimension]))).pdf(x)
                    cluster_sum += Z_EM
                    tmp = ax_cur[i].plot(x,Z_EM,'-',linewidth=0.8)
                tmp = ax_cur[i].plot(x,cluster_sum,'-',linewidth=2)
                print '{area},'.format(area = np.sum(cluster_sum*(x[1]-x[0]))),
                ax_cur[i].text(label_loc[0], label_loc[1],r'$\Delta \psi_%d$ '%(dimension+1,) + extra_txt_labels, fontsize = 8)#,bbox=dict(facecolor='white', alpha=0.5))
                ax_cur[i].locator_params(nbins=7)
        print ''
        for ax_cur, fig_cur in zip([ax_re, ax_im],[fig_re, fig_im]):
            ax_cur[-1].set_xlim([-np.pi,np.pi])
            ax_cur[-1].set_ylim([0,1.3])
            fig_cur.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
            fig_cur.suptitle(suptitle.replace('_','\char`_'),fontsize = 8)
            fig_cur.canvas.draw(); fig_cur.show()
clustering.py 文件源码 项目:PyFusionGUI 作者: SyntaxVoid 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def plot_dimension_histograms_VMM_GMM(self,pub_fig = 0, filename='plot_dim_hist.pdf',specific_dimensions = None, extra_txt_labels = '', label_loc = [-2,1.5], ylim = None):
        '''For each dimension in the data set, plot the histogram of the real and imag part of the measurements
        overlay the GMM's - used for the GMM-GMM clustering method

        SRH: 18May2014
        '''
        suptitle = self.settings.__str__().replace("'",'').replace("{",'').replace("}",'')
        cluster_GMM_mu = self.cluster_details['EM_GMM_means']
        cluster_GMM_sigma = self.cluster_details['EM_GMM_std']
        dimensions = cluster_GMM_mu.shape[1]

        instance_array_amps = self.feature_obj.misc_data_dict['mirnov_data']
        tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
        for i in range(1,instance_array_amps.shape[1]): tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
        if specific_dimensions == None: specific_dimensions = range(dimensions)
        fig_ang, ax_ang = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
        fig_abs, ax_abs = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
        amp_vals = np.abs(tmp)
        amp_vals[np.angle(tmp)<0]*= (-1)
        for i,dim in enumerate(specific_dimensions):
            ax_abs[i].hist(amp_vals[:,dim], bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
            ax_ang[i].hist(np.angle(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])

        if self.cluster_assignments!=None: cluster_list = list(set(self.cluster_assignments))
        x = np.linspace(-np.pi, np.pi, 300)
        cluster_prob_list = []
        for cluster in cluster_list:
            cluster_prob_list.append(float(np.sum(self.cluster_assignments==cluster))/float(len(self.cluster_assignments)))
        for i, dimension in enumerate(specific_dimensions):
            #for ax_cur, op in zip([ax_re,ax_im],[np.real, np.imag]):
            #for ax_cur, op in zip([ax_ang,ax_abs],[np.angle, np.abs]):
            for ax_cur, op in zip([ax_abs],[np.abs]):
                cluster_sum = x*0
                for cluster, cluster_prob in zip(cluster_list, cluster_prob_list):
                    Z_EM = cluster_prob * norm(loc=cluster_GMM_mu[cluster][dimension], scale=cluster_GMM_sigma[cluster][dimension]).pdf(x)
                    cluster_sum += Z_EM
                    tmp = ax_cur[i].plot(x,Z_EM,'-',linewidth=0.8)
                tmp = ax_cur[i].plot(x,cluster_sum,'-',linewidth=2)
                print '{area},'.format(area = np.sum(cluster_sum*(x[1]-x[0]))),
                ax_cur[i].text(label_loc[0], label_loc[1],r'$\Delta \psi_%d$ '%(dimension+1,) + extra_txt_labels, fontsize = 8)#,bbox=dict(facecolor='white', alpha=0.5))
                ax_cur[i].locator_params(nbins=7)
        print ''
        #for ax_cur, fig_cur in zip([ax_re, ax_im],[fig_re, fig_im]):
        for ax_cur, fig_cur in zip([ax_ang, ax_abs],[fig_ang, fig_abs]):
            ax_cur[-1].set_xlim([-np.pi,np.pi])
            ax_cur[-1].set_ylim([0,1.3])
            fig_cur.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
            fig_cur.suptitle(suptitle.replace('_','\char`_'),fontsize = 8)
            fig_cur.canvas.draw(); fig_cur.show()


问题


面经


文章

微信
公众号

扫码关注公众号