python类GMM的实例源码-面圈网

bow_utils.py 文件源码项目：pybot 作者: spillai 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _build_gmm(self, data): 
        """
        Build gmm from data
        """
        st = time.time()

        self.gmm = GMM(n_components=self.K, covariance_type='diag')
        self.gmm.fit(data)

        # Setup codebook for closest center lookup
        self.codebook = self.gmm.means_

        print 'Vocab construction from data %s (%s KB, %s) => GMM %s took %5.3f s' % \
            (data.shape, data.nbytes / 1024, data.dtype, self.gmm.means_.shape, time.time() - st)
        print 'GMM: %s' % ('GOOD' if np.isfinite(self.gmm.means_).all() else 'BAD')

        # Save codebook, and index
        self.index_codebook()

_bmutils.py 文件源码项目：molPX 作者: markovmodel 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def find_centers_gmm(data, gridpoints, n_components=2):
    r""" Provided 1D data and a grid of points, return the indices of the points closest to
    the centers of an assumed n-modal distribution behind "data"

    data : 1D data ndarray, of shape (N, 1)
    gridpoints: 1D gridpoints

    returns: idxs, igmm
    idxs: 1D ndarray
            INDICES of gridpoints that are closest to the centers of the gaussians

    igmm: gaussian mixture model (sklearn type)

    tested = true
    """
    igmm = _GMM(n_components=n_components)
    igmm.fit(data)
    return _np.abs(gridpoints-_np.sort(igmm.means_, 0)).argmin(1), igmm

clustering.py 文件源码项目：wi_wacv14 作者: VChristlein 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def parserArguments(parser):    
    parser.add_argument('--max_descriptors', nargs='*',
                        type=int, default=[150000],
                        help='load maximum descriptors')
    parser.add_argument('--num_clusters', type=int, default=100,\
                        help='number of cluster-centers = size of vocabulary')
    parser.add_argument('--vocabulary_filename', default='ubm',\
                        help='write vocabulary to this file')
    parser.add_argument('--method', default='gmm',\
                        choices=['gmm'],
                        help=('method for clustering'))
    parser.add_argument('--iterations', type=int, default=100,\
                        help=' number of iterations (if gmm, this is the gmm '
                        'part, not the kmeans-initialization part)')
    parser.add_argument('--update', default='wmc',\
                        help='what to update w. GMM, w:weights, m:means, c:covars')
    parser.add_argument('--covar_type', default='diag',
                        choices=['full','diag'],
                        help='covariance type for gmm')
    return parser

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_GMM_attributes():
    n_components, n_features = 10, 4
    covariance_type = 'diag'
    g = mixture.GMM(n_components, covariance_type, random_state=rng)
    weights = rng.rand(n_components)
    weights = weights / weights.sum()
    means = rng.randint(-20, 20, (n_components, n_features))

    assert_true(g.n_components == n_components)
    assert_true(g.covariance_type == covariance_type)

    g.weights_ = weights
    assert_array_almost_equal(g.weights_, weights)
    g.means_ = means
    assert_array_almost_equal(g.means_, means)

    covars = (0.1 + 2 * rng.rand(n_components, n_features)) ** 2
    g.covars_ = covars
    assert_array_almost_equal(g.covars_, covars)
    assert_raises(ValueError, g._set_covars, [])
    assert_raises(ValueError, g._set_covars,
                  np.zeros((n_components - 2, n_features)))

    assert_raises(ValueError, mixture.GMM, n_components=20,
                  covariance_type='badcovariance_type')

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_aic():
    # Test the aic and bic criteria
    n_samples, n_dim, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_dim)
    SGH = 0.5 * (X.var() + np.log(2 * np.pi))  # standard gaussian entropy

    for cv_type in ['full', 'tied', 'diag', 'spherical']:
        g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                        random_state=rng, min_covar=1e-7)
        g.fit(X)
        aic = 2 * n_samples * SGH * n_dim + 2 * g._n_parameters()
        bic = (2 * n_samples * SGH * n_dim +
               np.log(n_samples) * g._n_parameters())
        bound = n_dim * 3. / np.sqrt(n_samples)
        assert_true(np.abs(g.aic(X) - aic) / n_samples < bound)
        assert_true(np.abs(g.bic(X) - bic) / n_samples < bound)

clustering.py 文件源码项目：PyFusionGUI 作者: SyntaxVoid 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _EM_VMM_GMM_maximisation_step(self):
        '''Maximisation step SH : 7June2013
        '''
        self.pi_hat = np.sum(self.zij,axis=0)/float(self.n_instances)
        self.mu_list_old = self.mu_list.copy()
        self.kappa_list_old = self.kappa_list.copy()
        self.mean_list_old = self.mean_list.copy()
        self.std_list_old = self.std_list.copy()

        for cluster_ident in range(self.n_clusters):
            inst_tmp = (self.instance_array_complex.T * self.zij[:,cluster_ident]).T
            N= np.sum(self.zij[:,cluster_ident])
            #calculate the best fit for this cluster - all dimensions at once.... using new approximations
            #VMM part
            self.kappa_list[cluster_ident,:], self.mu_list[cluster_ident,:], scale_fit1 = EM_VMM_calc_best_fit(inst_tmp, lookup=self.bessel_lookup_table, N=N)
            #GMM part
            self.std_list[cluster_ident,:], self.mean_list[cluster_ident,:] = EM_GMM_calc_best_fit(self.instance_array_amps, self.zij[:,cluster_ident])
        #Prevent ridiculous situations happening....
        self.kappa_list = np.clip(self.kappa_list,0.1,300)
        self.std_list = np.clip(self.std_list,0.001,300)
        self._EM_VMM_GMM_check_convergence()

VUVclassification.py 文件源码项目：jingjuPhoneticSegmentation 作者: ronggong 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def gmm_cv(fv_train,target_train,fv_test,target_test):

    ####---- cross validation of train dataset, gridsearch the best parameters for gmm

    n_classes           = len(np.unique(target_train))
    n_components_max    = 7

    n_components_range  = range(1, n_components_max)
    cv_types            = ['spherical', 'tied', 'diag', 'full']
    for cv_type in cv_types:
        for n_components in n_components_range:
            # Fit a mixture of Gaussians with EM
            gmm         = mixture.GMM(n_components=n_components, covariance_type=cv_type)
            gmm.means_  = np.array([fv_train[target_train == i].mean(axis=0)
                                  for i in xrange(n_classes)])
            gmm.fit(fv_train_transformed)

            target_train_pred   = gmm.predict(fv_train_transformed)
            train_accuracy      = np.mean(target_train_pred == target_train) * 100

            print cv_type, n_components, ' Train accuracy: %.1f' % train_accuracy

models.py 文件源码项目：bof-aed 作者: rgrzeszi 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def compute_super_codebook(self, feature_size):
        '''
        Merges the GMMs that were computed for each class
        into one GMM (super codebook).
        @param feature_size: dimensionality of a feature vector 
        '''

        # Get number of classes
        ncl = len(self.labels)
        # Compute vocabsize per class
        vocab_size_per_cl = max(1, self.vocab_size / ncl)
        # Create GMM for overall repr
        print >> sys.stderr, 'Using GMM with', self.vocab_size, 'and', vocab_size_per_cl , 'per class.'
        self.gmm = GMM(self.vocab_size, n_iter=1, params='w', covariance_type='diag')
        # Init by fitting with ones
        self.gmm.fit(np.ones((self.vocab_size, feature_size)))

        # Overwrite values from supervised codebook GMM by class GMMs
        index = 0
        for _, sgmm in self.gmms.iteritems():
            vocab_size_per_cl = len(sgmm.means_)
            self.gmm.means_  [index:index + vocab_size_per_cl, :] = sgmm.means_
            self.gmm.covars_ [index:index + vocab_size_per_cl, :] = sgmm.covars_
            index += vocab_size_per_cl

        # Set uniform GMM weights
        self.gmm.weights_ = np.ones(self.vocab_size) / float(self.vocab_size)
        return

models.py 文件源码项目：bof-aed 作者: rgrzeszi 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def classify_proba(self, features):
        '''
        Returns the GMM predictions for the features of each component
        '''
        return self.gmm.predict_proba(features)

models.py 文件源码项目：bof-aed 作者: rgrzeszi 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def score_samples(self, features):
        '''
        Return the GMM scores for the features
        '''
        return self.gmm.score_samples(features)

models.py 文件源码项目：bof-aed 作者: rgrzeszi 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def train(self, datadict, labels):
        '''
        Trains the GMM Model with supervised codebooks 
        and soft quantization
        '''
        super(BoFModelSuper, self).train(datadict, labels, rand_features=True)
        self.featuresize = self.vocab_size
        return

ClassficationBase.py 文件源码项目：AirTicketPredicting 作者: junlulocky 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def gmmRemovingOutlierForClassifier():
    """
    use GMM model to remove outlier
    :return: NA
    """
    # load data
    X_train = np.load('inputClf_small/X_train.npy')
    y_train = np.load('inputClf_small/y_train.npy')
    y_train_price = np.load('inputClf_small/y_train_price.npy')

    # classifier initialize
    classifier = GMM(n_components=2,covariance_type='full', init_params='wmc', n_iter=20)

    # cluster initializing
    X_train1 = X_train[np.where(y_train==0)[0], :]
    X_train2 = X_train[np.where(y_train==1)[0], :]
    cluster1 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train1)
    cluster1 = cluster1.cluster_centers_
    cluster2 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train2)
    cluster2 = cluster2.cluster_centers_
    clusters = np.concatenate((cluster1, cluster2), axis=0)

    classifier.means_ = clusters

    # Train the other parameters using the EM algorithm.
    classifier.fit(X_train)

    # predict
    y_train_pred = classifier.predict(X_train)
    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
    print "Keep {}% data.".format(train_accuracy)


    # keep the data which are not outliers
    y_train_pred = y_train_pred.reshape((y_train_pred.shape[0], 1))
    X_train = X_train[np.where(y_train==y_train_pred)[0], :]
    y_train_price = y_train_price[np.where(y_train==y_train_pred)[0], :]
    y_train = y_train[np.where(y_train==y_train_pred)[0], :]
    np.save('inputClf_GMMOutlierRemoval/X_train', X_train)
    np.save('inputClf_GMMOutlierRemoval/y_train', y_train)
    np.save('inputClf_GMMOutlierRemoval/y_train_price', y_train_price)

run_diffacto.py 文件源码项目：diffacto 作者: statisticalbiotechnology 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def zero_center_normalize(df, samples, logInput=False, method='median'):
    '''
    Transforming input peptide abundance table into log2-scale and centralize to zero.
    Inputs:
        df :        dataframe of peptide abundaces
        samples:    column names of selected samples
        logInput:   input abundances are already in log scale
        method:     method for estimating zero point
    '''
    assert method in ('median', 'average', 'GMM'), \
        'Zero centering method has to be among median, average or GMM!'
    if not logInput:
        # convert abundances to log2 scale
        df[samples] = df[samples].apply(np.log2)
    if method == 'average':
        norm_scale = np.nanmean(df[samples], axis=0)
    elif method == 'median':
        norm_scale = np.nanmedian(df[samples], axis=0)
    elif method == 'GMM':
        ''' two-component Gaussian mixture model '''
        from sklearn.mixture import GMM
        gmm = GMM(2)
        norm_scale = []
        for sp in samples:
            v = df[sp].values
            v = v[np.logical_not(np.isnan(v))]
            v = v[np.logical_not(np.isinf(v))]
            try:
                gmm.fit(np.matrix(v.values).T)
                vmean = gmm.means_[np.argmin(gmm.covars_)]
                norm_scale.append(vmean)
            except:
                norm_scale.append(np.nanmean(v))
        norm_scale = np.array(norm_scale)
    df[samples] = df[samples] - norm_scale
    return df

test_gmm_hmm.py 文件源码项目：NetPower_TestBed 作者: Vignesh2208 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features)
    g.weights_ = normalized(prng.rand(n_mix))
    return g

gmm.py 文件源码项目：lazyprogrammer 作者: inhwane 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def main():
    pi = np.array([0.3, 0.5, 0.2])
    mu = np.array([[1,1], [-1,-1], [-1,1]])*3
    sigma = np.array([
        [[1,0], [0,1]],
        [[2,0], [0,2]],
        [[0.5,0], [0, 0.5]],
    ])
    X, C = generate_data(pi, mu, sigma, 1000)
    plt.scatter(X[:,0], X[:,1], c=C, s=100, alpha=0.5)
    plt.show()


    # sklearn
    gmm = GMM(n_components=3, covariance_type='full')
    gmm.fit(X)
    print "pi:", gmm.weights_
    print "mu:", gmm.means_
    print "sigma:", gmm.covars_

    pi2, mu2, sigma2, L = expectation_maximization(X, len(pi))
    print "pi:", pi2
    print "mu:", mu2
    print "sigma:", sigma2
    plt.plot(L)
    plt.show()

evaluate_threshold.py 文件源码项目：icing 作者: slipguru 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def show_fit(filename, ax=None):
    """Util function. Show a gaussian fit on nearest distances.

    Usage example:
        np.mean([show_fit(f) for f in list_naive_npy])
    """
    from sklearn.mixture import GMM
    X = np.load("{}".format(filename))
    dist2nearest = np.array(X).reshape(-1, 1)
    if dist2nearest.shape[0] < 2:
        print("Cannot fit a Gaussian with two distances.")
        return

    dist2nearest_2 = -(np.array(sorted(dist2nearest)).reshape(-1, 1))
    dist2nearest = np.array(list(dist2nearest_2) +
                            list(dist2nearest)).reshape(-1, 1)
    gmm = GMM(n_components=3)
    gmm.fit(dist2nearest)

    plt.hist(dist2nearest, bins=50, normed=True)
    linspace = np.linspace(-1, 1, 1000)[:, np.newaxis]
    plt.plot(linspace, np.exp(gmm.score_samples(linspace)[0]), 'r')

    lin = np.linspace(0, 1, 10000)[:, np.newaxis]
    pred = gmm.predict(linspace)
    argmax = np.argmax(gmm.means_)
    idx = np.min(np.where(pred == argmax)[0])
    plt.axvline(x=lin[idx], linestyle='--', color='r')
    plt.show()
    return lin[idx]  # threshold

learning_function.py 文件源码项目：icing 作者: slipguru 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def _gaussian_fit(array):
    if array.shape[0] < 2:
        logging.error("Cannot fit a Gaussian with two distances.")
        return 0

    array_2 = -(np.array(sorted(array)).reshape(-1, 1))
    array = np.array(list(array_2) + list(array)).reshape(-1, 1)

    try:
        # new sklearn GMM
        gmm = mixture.GaussianMixture(n_components=3,
                                      covariance_type='diag')
        gmm.fit(array)
        # gmmmean = np.max(gmm.means_)
        # gmmsigma = gmm.covariances_[np.argmax(gmm.means_)]
    except AttributeError:
        # use old sklearn method
        gmm = mixture.GMM(n_components=3)
        gmm.fit(array)
        # gmmmean = np.max(gmm.means_)
        # gmmsigma = gmm.covars_[np.argmax(gmm.means_)]

    # Extract optimal threshold
    plt.hist(array, bins=50, normed=True)  # debug, print

    lin = np.linspace(0, 1, 10000)[:, np.newaxis]
    # plt.plot(lin, np.exp(gmm.score_samples(lin)[0]), 'r')
    pred = gmm.predict(lin)
    try:
        idx = np.min(np.where(pred == np.argmax(gmm.means_))[0])
    except ValueError:
        # print("Error", np.unique(pred))
        idx = 0

    plt.axvline(x=lin[idx], linestyle='--', color='r')
    # plt.gcf().savefig("threshold_naive{}.png".format(k))
    plt.close()
    threshold = lin[idx][0]  # threshold
    # np.save("threshold_naive", threshold)
    return threshold

train.py 文件源码项目：recognizeFitExercise 作者: tyiannak 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def trainGMM(features, nComp, covType = 'diag'):
    Gs = []
    for f in features:
        gmm = mixture.GMM(n_components=int(nComp), covariance_type = covType)
        Gs.append(gmm.fit(f))

    return Gs

encoding.py 文件源码项目：wi_wacv14 作者: VChristlein 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def encodeGMM(method, gmm, data,  
              normalize=['ssr', 'l2g'], relevance=28, update='wmc',
              posteriors=None):
    """ 
        Encoding scheme adapting data from a GMM

        parameters:
            method: an encoding method to call, currently acceptable
                'supervector', 'fisher', 'vlad', 
            gmm: the background gmm = vocabulary = universal background model
            data: typiclly the features, row wise order
            normalize: global and local normalization schemes
            relevance: (gmm-supervector-specific): relevance factor for mixing 
            update: (gmm-supervector-specific): which parts to use for mixing 
            posteriors: if given, they won't be computed again
        returns: encoded vector
    """
    if posteriors == None:
        # compute posteriors from gmm
        posteriors = getPosteriors(gmm, data)

    if 'supervector' in method:
        enc = supervector(gmm, data, posteriors, relevance=relevance,
                          update=update)
    elif 'fisher' in method:
        enc = fisherEncode(gmm, data, 
                           posteriors, normalize, 
                           fv_components=update)
    elif 'vlad' in method:
        enc = vlad(data, gmm.means_, posteriors, gmm.means_.shape[0],
                   normalize ) 
    else:
        raise ValueError('unknown encoding method {}'.format(method)) 

    norm_enc = normalizeEnc(enc, normalize)

    return norm_enc

clustering.py 文件源码项目：wi_wacv14 作者: VChristlein 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def fitGMM(data, num_clusters, iterations, update, covar_type):
    print ('fit gmm with num_clusters {}, params {}, n_iter {}, covar_type '
        '{}'.format(num_clusters, update, iterations, covar_type))
    gmm = mixture.GMM(num_clusters, n_iter=iterations,
                      params=update, covariance_type=covar_type)
    gmm.fit(data)
    return gmm

methods.py 文件源码项目：South-African-Heart-Disease-data-analysis-using-python 作者: khushi4tiwari 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def gmm(X,y,M,C,K=4,cov_type='diag',reps=10):


    # Fit Gaussian mixture model

    gmm = GMM(n_components=K, covariance_type=cov_type, n_init=reps, params='wmc').fit(X)

    cls = gmm.predict(X)    # extract cluster labels

    cds = gmm.means_        # extract cluster centroids (means of gaussians)

    covs = gmm.covars_      # extract cluster shapes (covariances of gaussians)



    if cov_type == 'diag':

        new_covs = np.zeros([K,M,M])

        count = 0

        for elem in covs:

            temp_m = np.zeros([M,M])

            for i in range(len(elem)):

                temp_m[i][i] = elem[i]

            new_covs[count] = temp_m

            count += 1

        covs = new_covs

    clusterPlot(X,cls,K,C,y,cds,covs)

methods.py 文件源码项目：South-African-Heart-Disease-data-analysis-using-python 作者: khushi4tiwari 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def CVK(X,KRange,covar_type='diag',reps=10):
    N, M = X.shape
    T = len(KRange)

    CVE = np.zeros((T,1))

    # K-fold crossvalidation
    CV = cross_validation.KFold(N,5,shuffle=True)

    for t,K in enumerate(KRange):
            print('Fitting model for K={0}\n'.format(K))

            # Fit Gaussian mixture model
            gmm = GMM(n_components=K, covariance_type=covar_type, n_init=reps, params='wmc').fit(X)

            # For each crossvalidation fold
            for train_index, test_index in CV:

                # extract training and test set for current CV fold
                X_train = X[train_index]
                X_test = X[test_index]

                # Fit Gaussian mixture model to X_train
                gmm = GMM(n_components=K, covariance_type=covar_type, n_init=reps, params='wmc').fit(X_train)

                # compute negative log likelihood of X_test
                CVE[t] += -gmm.score(X_test).sum()
                #print CVE[t]

        # Plot results
    return CVE

    #figure(); hold(True)
    #plot(KRange, 2*CVE)
    #legend(['Crossvalidation'])
    #xlabel('K')
    #show()

train.py 文件源码项目：pyImageClassification 作者: tyiannak 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def trainGMM(features, nComp, covType = 'diag'):
    Gs = []
    for f in features:
        gmm = mixture.GMM(n_components=int(nComp), covariance_type = covType)
        Gs.append(gmm.fit(f))

    return Gs

Timbre_Analysis.py 文件源码项目：msd-genrecl 作者: Phonicavi 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def Inner_GMM(SingleGenreSongs,numOfClusters):
    means = [item.mean_timbreVec for item in SingleGenreSongs]
    means_array = np.array(means)
    model = mixture.GMM(n_components = numOfClusters,covariance_type = 'full',n_init = 10)
    model.fit(means)

    lbls = model.predict(means)

    clusters_mean = [[] for i in range(numOfClusters)];
    clusters_cov = [[] for i in range(numOfClusters)];
    clusters_cnt = [0 for i in range(numOfClusters)];
    ret_clusters_mean = [[] for i in range(numOfClusters)];
    ret_clusters_cov = [[] for i in range(numOfClusters)];

    assert(len(lbls) == len(SingleGenreSongs))

    for i in range(len(lbls)):
        lb = lbls[i]
        clusters_mean[lb].append(np.array(SingleGenreSongs[i].mean_timbreVec))
        clusters_cov[lb].append(np.array(SingleGenreSongs[i].cov_timbreVec))
        clusters_cnt[lb] += 1


    for i in range(numOfClusters):
        # print type(clusters_mean[i][0])
        ret_clusters_mean[i] = sum(clusters_mean[i])
        ret_clusters_cov[i] = sum(clusters_cov[i])
        # print type(ret_clusters_mean[i][0])
        ret_clusters_mean[i] = [(item)/clusters_cnt[i] for item in ret_clusters_mean[i]]
        ret_clusters_cov[i] = [(item)/clusters_cnt[i] for item in ret_clusters_cov[i]]
    return ret_clusters_mean,ret_clusters_cov

    # return model.means_,model.covars_

fisher_vector.py 文件源码项目：QScode 作者: PierreHao 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def dictionary(self, descriptors, N):
        gmm = mixture.GMM(n_components=N, covariance_type='full')
        gmm.fit(descriptors)
        self.means = np.float32(gmm.means_)
        self.covs = np.float32(gmm.covars_)
        self.weights = np.float32(gmm.weights_)
        return self.means, self.covs, self.weights

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_train(self, params='wmc'):
        g = mixture.GMM(n_components=self.n_components,
                        covariance_type=self.covariance_type)
        g.weights_ = self.weights
        g.means_ = self.means
        g.covars_ = 20 * self.covars[self.covariance_type]

        # Create a training set by sampling from the predefined distribution.
        X = g.sample(n_samples=100)
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type,
                       random_state=rng, min_covar=1e-1,
                       n_iter=1, init_params=params)
        g.fit(X)

        # Do one training iteration at a time so we can keep track of
        # the log likelihood to make sure that it increases after each
        # iteration.
        trainll = []
        for _ in range(5):
            g.params = params
            g.init_params = ''
            g.fit(X)
            trainll.append(self.score(g, X))
        g.n_iter = 10
        g.init_params = ''
        g.params = params
        g.fit(X)  # finish fitting

        # Note that the log likelihood will sometimes decrease by a
        # very small amount after it has more or less converged due to
        # the addition of min_covar to the covariance (to prevent
        # underflow).  This is why the threshold is set to -0.5
        # instead of 0.
        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > self.threshold,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s."
            % (delta_min, self.threshold, self.covariance_type, trainll))

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_multiple_init():
    # Test that multiple inits does not much worse than a single one
    X = rng.randn(30, 5)
    X[:10] += 2
    g = mixture.GMM(n_components=2, covariance_type='spherical',
                    random_state=rng, min_covar=1e-7, n_iter=5)
    train1 = g.fit(X).score(X).sum()
    g.n_init = 5
    train2 = g.fit(X).score(X).sum()
    assert_true(train2 >= train1 - 1.e-2)

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_n_parameters():
    # Test that the right number of parameters is estimated
    n_samples, n_dim, n_components = 7, 5, 2
    X = rng.randn(n_samples, n_dim)
    n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41}
    for cv_type in ['full', 'tied', 'diag', 'spherical']:
        g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                        random_state=rng, min_covar=1e-7, n_iter=1)
        g.fit(X)
        assert_true(g._n_parameters() == n_params[cv_type])

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_1d_1component():
    # Test all of the covariance_types return the same BIC score for
    # 1-dimensional, 1 component fits.
    n_samples, n_dim, n_components = 100, 1, 1
    X = rng.randn(n_samples, n_dim)
    g_full = mixture.GMM(n_components=n_components, covariance_type='full',
                         random_state=rng, min_covar=1e-7, n_iter=1)
    g_full.fit(X)
    g_full_bic = g_full.bic(X)
    for cv_type in ['tied', 'diag', 'spherical']:
        g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                        random_state=rng, min_covar=1e-7, n_iter=1)
        g.fit(X)
        assert_array_almost_equal(g.bic(X), g_full_bic)

test_gmm.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_verbose_first_level():
    # Create sample data
    X = rng.randn(30, 5)
    X[:10] += 2
    g = mixture.GMM(n_components=2, n_init=2, verbose=1)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        g.fit(X)
    finally:
        sys.stdout = old_stdout