python类NMF的实例源码-面圈网

topic_modeling.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def print_topic_summary(self, df, topic_num, num_words=20):
        '''
        Function to print summary of a topic from NMF clustering

        INPUT:
            df: pandas DataFrame that NMF clustering was run on
            topic_num: index of topic from clustering
            num_words: top n words to print in summary
        '''
        num_reviews = self.labels[:, topic_num].sum()
        print 'Summary of Topic {}:'.format(topic_num)
        print 'Number of reviews in topic: {}'.format(num_reviews)
        print 'Top {} words in topic:'.format(num_words)
        print self.top_words_by_topic(num_words, topic_num)
        if not num_reviews:
            return None

taghasher.py 文件源码项目：pantip-libr 作者: starcolon 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def new(n_feature=128):
  vectorizer = CountVectorizer(
    encoding='utf-8',
    ngram_range=(1,1), # Unigram only
    max_features=n_feature, 
    binary=True
  )

  # Fill the gap (missing expected tags)
  # ---
  # Hypothesis: Some tags are somehow related so 
  # we smoothen the missing values with matrix factorisation.
  smoother = NMF(n_components=n_feature)

  # Binarise the vector's individual values 
  binariser = Binarizer(copy=True)

  # Count vectoriser => NMF as smoother => Binariser
  print(colored('Taghasher model created','yellow'))
  return [vectorizer,smoother,binariser]

test_algorithms.py 文件源码项目：thunder-factorization 作者: thunder-project 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_nmf(eng):

    t = linspace(0, 10, 100)
    s1 = 1 + absolute(sin(t))
    s2 = 1 + square(cos(2*t))

    h = c_[s1, s2].T
    w = array([[1, 0], [0, 1], [1, 1]])
    x = dot(w, h)
    x = fromarray(x, engine=eng)

    from sklearn.decomposition import NMF as skNMF
    nmf = skNMF(n_components=2, random_state=0)
    w1 = nmf.fit_transform(x.toarray())
    h1 = nmf.components_
    xhat1 = dot(w1, h1)

    w2, h2 = NMF(k=2, seed=0).fit(x)
    xhat2 = dot(w2, h2)

    tol=1e-1
    assert allclose(xhat1, xhat2, atol=tol)

slow_cluster.py 文件源码项目：efficient-bge 作者: johnlees 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def run_nmf(alignment, num_clusters, alpha, mixing):
    model = NMF(n_components = num_clusters, init = 'nndsvd', alpha = alpha, l1_ratio = mixing, verbose = 0)
    return(model.fit_transform(alignment))

topic_modeling.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def fit_nmf(self, df):
        '''
        Function to run NMF clustering on dataframe

        INPUT:
            df: pandas Dataframe containing 'lemmatized_text' column for TF-IDF
        '''
        self.optimize_nmf(df)
        self.nmf = NMF(n_components=self.optimum_topics, alpha=self.nmf_alpha,
                       l1_ratio=self.nmf_l1_ratio, random_state=self.random_state).fit(self.tfidf_matrix)
        self.W_matrix = self.nmf.transform(self.tfidf_matrix)
        sums = self.W_matrix.sum(axis=1)
        self.W_pct = self.W_matrix / sums[:, None]
        self.labels = self.W_pct >= 0.20
        print "Reconstruction Error: {}".format(self.nmf.reconstruction_err_)

topic_modeling.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def plot_topic(self, topic_idx):
        '''
        Function to plot a wordcloud based on a topic

        INPUT:
            topic_idx: index of topic from NMF clustering
        '''
        title = raw_input('Enter a title for this plot: ')
        num_reviews = self.labels[:, topic_idx].sum()
        word_freq = self.topic_word_frequency(topic_idx)
        wc = WordCloud(width=2000, height=1000, max_words=150,
                       background_color='white')
        wc.fit_words(word_freq)
        fig = plt.figure(figsize=(16, 8))
        ax = fig.add_subplot(111)
        ax.set_title('Topic {}: {}\nNumber of Reviews in Topic: {}'.format(
            topic_idx, title, num_reviews), fontsize=24)
        ax.axis('off')
        ax.imshow(wc)
        name = 'topic_' + str(topic_idx) + '.png'
        if self.pro_or_con == 'pro':
            img_path = os.path.join('images', 'positive')
        else:
            img_path = os.path.join('images', 'negative')
        plt.savefig(os.path.join(img_path, name))
        plt.show()

topic_modeling.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def visualize_topics(self, df):
        '''
        Function to cycle through all topics and print summary and plot cloud

        INPUT:
            df: pandas DataFrame (source for NMF text)
        '''
        for i in range(self.optimum_topics):
            self.print_topic_summary(df, i)
            self.plot_topic(i)
            print ''

nmf.py 文件源码项目：tbp-next-basket 作者: GiulioRossetti 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def build_model(self, baskets, use_probabilities=False):
        # print 'build V'
        self.__buildV(baskets, use_probabilities)
        # print 'density', 1.0 * len(self.V.nonzero()[0]) / (self.V.shape[0] * self.V.shape[1])

        sknmf = SKNMF(n_components=self.n_factor, init='random', solver='cd', tol=self.tol, max_iter=self.max_iter,
                      alpha=self.alpha, l1_ratio=self.l1_ratio, beta=self.beta)

        self.W = sknmf.fit_transform(self.V)
        self.H = sknmf.components_
        self.R = np.dot(self.W, self.H)

        self.__state = 'built'

        return self

nmf.py 文件源码项目：topic-ensemble 作者: derekgreene 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def apply( self, X, k = 2, init_W = None, init_H = None ):
        """
        Apply NMF to the specified document-term matrix X.
        """
        self.W = None
        self.H = None
        random_seed = np.random.randint( 1, 100000 )
        if not (init_W is None or init_H is None):
            model = decomposition.NMF( init="custom", n_components=k, max_iter=self.max_iters, random_state = random_seed )
            self.W = model.fit_transform( X, W=init_W, H=init_H )
        else:
            model = decomposition.NMF( init=self.init_strategy, n_components=k, max_iter=self.max_iters, random_state = random_seed )
            self.W = model.fit_transform( X )
        self.H = model.components_

nmf.py 文件源码项目：topic-ensemble 作者: derekgreene 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def rank_terms( self, topic_index, top = -1 ):
        """
        Return the top ranked terms for the specified topic, generated during the last NMF run.
        """
        if self.H is None:
            raise ValueError("No results for previous run available")
        # NB: reverse
        top_indices = np.argsort( self.H[topic_index,:] )[::-1]
        # truncate if necessary
        if top < 1 or top > len(top_indices):
            return top_indices
        return top_indices[0:top]

plot.py 文件源码项目：CElegansBehaviour 作者: ChristophKirst 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def plot_nmf(data, analyse = True, n_components = 2):
  """Perform NMF and plot overview of the results"""

  if analyse:
    nmf = sd.NMF(n_components=n_components, init = 'nndsvdar', random_state = 0, solver = 'cd')
    Y = nmf.fit_transform(data)
  else:
    Y = data;
    nmf = None;    

  if n_components is None:
    n_components = 3;

  if n_components == 1:
    plt.subplot(1,3,1);  
    plt.plot(Y);
  elif n_components == 2:
    plt.subplot(1,3,1); 
    plt.scatter(Y[:,0], Y[:,1], c = range(len(Y[:,0])), cmap = plt.cm.Spectral);
  else:
    ax = plt.gcf().add_subplot(1,3,1, projection = '3d');
    ax.scatter(Y[:, 0], Y[:, 1], Y[:,2], c = range(len(Y[:,0])), cmap=plt.cm.Spectral)
  plt.title("nmf")

  if nmf is not None:
    feat = nmf.components_;
    plt.subplot(1,3,2);
    plt.imshow(feat, interpolation = 'none', aspect = 'auto', cmap = 'viridis')
    plt.colorbar(pad = 0.01,fraction = 0.01)
    plt.title('features');

  plt.subplot(1,3,3);
  plt.imshow(Y, interpolation = 'none', aspect = 'auto', cmap = 'viridis')
  plt.colorbar(pad = 0.01,fraction = 0.01)
  plt.title('amplitudes');

  plt.tight_layout();

plot.py 文件源码项目：CElegansBehaviour 作者: ChristophKirst 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def plot_nmf(data, analyse = True, n_components = 2):
  """Perform NMF and plot overview of the results"""

  if analyse:
    nmf = sd.NMF(n_components=n_components, init = 'nndsvdar', random_state = 0, solver = 'cd')
    Y = nmf.fit_transform(data)
  else:
    Y = data;
    nmf = None;    

  if n_components is None:
    n_components = 3;

  if n_components == 1:
    plt.subplot(1,3,1);  
    plt.plot(Y);
  elif n_components == 2:
    plt.subplot(1,3,1); 
    plt.scatter(Y[:,0], Y[:,1], c = range(len(Y[:,0])), cmap = plt.cm.Spectral);
  else:
    ax = plt.gcf().add_subplot(1,3,1, projection = '3d');
    ax.scatter(Y[:, 0], Y[:, 1], Y[:,2], c = range(len(Y[:,0])), cmap=plt.cm.Spectral)
  plt.title("nmf")

  if nmf is not None:
    feat = nmf.components_;
    plt.subplot(1,3,2);
    plt.imshow(feat, interpolation = 'none', aspect = 'auto', cmap = 'viridis')
    plt.colorbar(pad = 0.01,fraction = 0.01)
    plt.title('features');

  plt.subplot(1,3,3);
  plt.imshow(Y, interpolation = 'none', aspect = 'auto', cmap = 'viridis')
  plt.colorbar(pad = 0.01,fraction = 0.01)
  plt.title('amplitudes');

  plt.tight_layout();

initialization.py 文件源码项目：SCaIP 作者: simonsfoundation 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def finetune(Y, cin, nIter=5):
    """Fine tuning of components within greedyROI using rank-1 NMF
    """
    for iter in range(nIter):
        a = np.maximum(np.dot(Y, cin), 0)
        a = a / np.sqrt(np.sum(a**2))
        c = np.sum(Y * a[..., np.newaxis], tuple(np.arange(Y.ndim - 1)))

    return a, c

#%%

reverb_purpose_extraction.py 文件源码项目：Hanhan_NLP 作者: hanhanwu 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def NMF_feature_extraction(text_lst, n_samples, n_features, n_topics, n_top_words):
    print "Extracting tf-idf features for NMF..."
    tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english')
    tfidf = tfidf_vectorizer.fit_transform(text_lst)
    print "Fitting the NMF model with tf-idf features," "n_samples=%d and n_features=%d..." % (n_samples, n_features)
    nmf = NMF(n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5).fit(tfidf)
    print "\nTopics in NMF model:" 
    tfidf_feature_names = tfidf_vectorizer.get_feature_names()
    print_top_words(nmf, tfidf_feature_names, n_top_words)
    print "*************end NMF****************"

dimensionality_reduction.py 文件源码项目：eezzy 作者: 3Blades 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def NMF_results(data, n_comps=None):
    nmf = NMF(n_components=n_comps)
    model = nmf.fit(data)
    out_data = {'model' : model, 'reconstruction error': nmf.reconstruction_err_ }
    return 'NMF', out_data

topic_model.py 文件源码项目：Trendster 作者: rawanhassunah 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def nmf(X, n_components=None):
    '''
    Non Negative Matrix Factorization.
    Outputs the weights (W) matrix and the components.
    '''
    model = NMF(n_components)
    W = model.fit_transform(X)
    components = model.components_
    return W, components

classifier.py 文件源码项目：Trendster 作者: rawanhassunah 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def nmf(X, n_components=None):
    model = NMF(n_components)
    W = model.fit_transform(X)
    components = model.components_
    return W, components

topics_extraction.py 文件源码项目：my_topics 作者: GaelVaroquaux 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def build_analyzer(self):
        analyzer = super(TfidfVectorizer, self).build_analyzer()
        return lambda doc: (no_plural_stemmer(w) for w in analyzer(doc))

# We use a few heuristics to filter out useless terms early on: the posts
# are stripped of headers, footers and quoted replies, and common English
# words, words occurring in only one document or in at least 95% of the
# documents are removed.

# Use tf-idf features for NMF.

baselines.py 文件源码项目：context_predictive_words 作者: Cogitans 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def nmf_accuracy():
    tdm = pickle.load(open(DATASET_PATH + "BOW.p", "rb"))
    true_labels = pickle.load(open(OUTFILE_STANCE, "rb"))[0]
    print("I'm NNMF-ing!")
    NNMF = NMF(max_iter=50, n_components=100)
    tdm_reshaped = NNMF.fit_transform(tdm)
    print("I'm clustering!")
    cluster_kmeans(tdm_reshaped, true_labels)

sf_map.py 文件源码项目：crime_prediction 作者: livenb 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def build_nmf(X, k=5):
    mod = NMF(n_components=k)
    W = mod.fit_transform(X)
    H = mod.components_
    return W, H

build_map.py 文件源码项目：crime_prediction 作者: livenb 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_nmf_all(X, k=5):
    scaler = MinMaxScaler()
    X_sca = scaler.fit_transform(X)
    nmfModel = NMF(n_components=k)
    W = nmfModel.fit_transform(X_sca)
    H = nmfModel.components_
    print 'NMF done!'
    # plot_heatmap(H.T, k=k)
    labelsNMF = W.argmax(axis=1)
    return W, H, labelsNMF, nmfModel

compare.py 文件源码项目：crime_prediction 作者: livenb 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def nmf_test(df):
    X = df.drop(['Year', 'zipcode'], axis=1).values
    scaler = MinMaxScaler()
    X_sca = scaler.fit_tranform(X)
    scores = []
    for k in xrange(2, 11):
        model = NMF(n_components=k)
        W = model.fit_transform(X_sca)
        labels = W.argmax(axis=1)
        score = silhouette_score(X_sca, labels)
        scores.append(score)
    plt.plot(xrange(2, 11), scores, 'b*-')
    plt.show()

generate_NMF_6_feature.py 文件源码项目：kaggle-quora-solution-8th 作者: qqgeogor 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def svd(train,test,dims=6,it=15,file_name='tf_idf',path='data/'):
    svd=NMF(random_state=1123,n_components=dims)
    svd.fit(train)
    #print svd.transform(train).shape
    pd.to_pickle(svd.transform(train),path+'train_NMF_'+str(dims)+'_'+file_name+'.pkl')
    pd.to_pickle(svd.transform(test),path+'test_NMF_'+str(dims)+'_'+file_name+'.pkl')
    return 'Success'


# In[16]:

estimator_checks.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def check_transformer_data_not_an_array(name, Transformer):
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1
    this_X = NotAnArray(X)
    this_y = NotAnArray(np.asarray(y))
    _check_transformer(name, Transformer, this_X, this_y)

estimator_checks.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def check_classifiers_classes(name, Classifier):
    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
    X, y = shuffle(X, y, random_state=7)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1
    y_names = np.array(["one", "two", "three"])[y]

    for y_names in [y_names, y_names.astype('O')]:
        if name in ["LabelPropagation", "LabelSpreading"]:
            # TODO some complication with -1 label
            y_ = y
        else:
            y_ = y_names

        classes = np.unique(y_)
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            classifier = Classifier()
        if name == 'BernoulliNB':
            classifier.set_params(binarize=X.mean())
        set_testing_parameters(classifier)
        set_random_state(classifier)
        # fit
        classifier.fit(X, y_)

        y_pred = classifier.predict(X)
        # training set performance
        assert_array_equal(np.unique(y_), np.unique(y_pred))
        if np.any(classifier.classes_ != classes):
            print("Unexpected classes_ attribute for %r: "
                  "expected %s, got %s" %
                  (classifier, classes, classifier.classes_))

NMF.py 文件源码项目：thunder-factorization 作者: thunder-project 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _fit_local(self, data):

        from sklearn.decomposition import NMF

        nmf = NMF(n_components=self.k, tol=self.tol, max_iter=self.max_iter, random_state=self.seed)
        w = nmf.fit_transform(data)

        return w, nmf.components_,

topic_modeling.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def optimize_nmf(self, df):
        '''
        Function to optimize the number of topics used in NMF clustering.

        INPUT:
            df: pandas Dataframe containing 'lemmatized_text' column for TF-IDF
        '''
        self.fit_tfidf(df)
        if not self.optimum_topics:
            avg_cosine_sim = []
            pbar = ProgressBar()
            for i in pbar(self.num_topics):
                cosine_sim = []
                self.nmf = NMF(n_components=i,
                        alpha=self.nmf_alpha,
                        l1_ratio=self.nmf_l1_ratio,
                        random_state=self.random_state).fit(self.tfidf_matrix)
                err = self.nmf.reconstruction_err_
                self.H_matrix = self.nmf.components_
                if i == 1:
                    avg_cosine_sim.append(1)
                else:
                    idx_arr = np.arange(i)
                    for combo in combinations(idx_arr, 2):
                        vect_1 = self.H_matrix[:, int(combo[0])].reshape(-1, 1)
                        vect_2 = self.H_matrix[:, int(combo[1])].reshape(-1, 1)
                        sim = cosine_similarity(vect_1, vect_2)
                        cosine_sim.append(sim)
                    avg_cosine_sim.append(np.mean(cosine_sim))
                self.reconstruction_err_array.append(err)
            fig = plt.figure(figsize=(16, 8))
            ax_1 = fig.add_subplot(211)
            ax_1.plot(self.num_topics, self.reconstruction_err_array)
            ax_1.set_title("Reconstruction Error vs Number of Topics")
            ax_1.set_xlabel("Number of Topics")
            ax_1.set_ylabel("Reconstruction Error")
            ax_2 = fig.add_subplot(212)
            ax_2.plot(self.num_topics, avg_cosine_sim)
            ax_2.set_title("Avg Cosine Similarity Between Topics")
            ax_2.set_xlabel("Number of Topics")
            ax_2.set_ylabel("Avg Cosine Similarity")
            plt.tight_layout()
            if self.pro_or_con == 'pro':
                img_path = os.path.join('images', 'positive')
            else:
                img_path = os.path.join('images', 'negative')
            plt.savefig(os.path.join(img_path, "nmf_metrics.png"))
            plt.show()
            self.optimum_topics = int(raw_input("Desired topics from graph: "))

reverb_purpose_extraction.py 文件源码项目：Hanhan_NLP 作者: hanhanwu 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def main():
    r_pre = "[your file path]/all_purpose"
    f_path = "[your file path]/all_purpose_export.txt"

    p1 = r_pre + "\.csv\t\d+\t(.*?)(\t\d+){6}"
    p2 = "(.*?)O\s*\t(.*?)"

    extracted_combo_dct = {}
    stemmed_extracted_combo_dct = {}
    extracted_combo_lst = []
    stemmed_extracted_combo_lst = []

    n_top_words = 3
    n_topics = 20
    n_features = 50


    f = open(f_path)
    for l in f:
        r1 = re.search(p1, l)
        m1 = ' '.join(r1.group(1).split('\t'))
        r2 = re.search(p2, l)
        if r2 == None:
            print l
            break    # used to add missing " O"
        m2 = ' '.join([e for e in l.split(r2.group(1))[1].split('O')[1].split('\t') if e != ' ']).split('\n')[0]

        extracted_combo_dct.setdefault(m1, 0)
        stemmed_extracted_combo_dct.setdefault(m2, 0)

        extracted_combo_dct[m1] += 1
        stemmed_extracted_combo_dct[m2] += 1

        extracted_combo_lst.append(m1)
        stemmed_extracted_combo_lst.append(m2)


    sort_dct_by_value(extracted_combo_dct)
    sort_dct_by_value(stemmed_extracted_combo_dct)

    n_samples = len(extracted_combo_lst)
    n_stemmed_samples = len(stemmed_extracted_combo_lst)

    # using NMF feature extraction
    NMF_feature_extraction(extracted_combo_lst, n_samples, n_features, n_topics, n_top_words)
    NMF_feature_extraction(stemmed_extracted_combo_lst, n_stemmed_samples, n_features, n_topics, n_top_words)

    # using LDA feature extraction
    LDA_feature_extraction(extracted_combo_lst, n_samples, n_features, n_topics, n_top_words)
    LDA_feature_extraction(stemmed_extracted_combo_lst, n_stemmed_samples, n_features, n_topics, n_top_words)

paretonmf.py 文件源码项目：BotBoosted 作者: brityboy 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def evaluate(self, matrix):
        """
        Args:
            matrix (2d array): this is the matrix of documents and tokens
            where the number of topics needs to be determined, this has worked
            with compressed sparse row matrices before
        Returns
            topic_count (int): this is the number of topics that IPNMF was
            able to pick up heuristically
        """
        if self.noise_pct == 'auto':
            self._pareto_corpus_content(matrix, .8)
        if self.step == 'auto':
            self._determine_auto_step_size(matrix)
        if self.pnmf_verbose:
            print('initializing evaluation...')
        self.corpus_count = matrix.shape[0]
        self.rich_content = int(self.corpus_count * (1-self.noise_pct))
        self.noise_content = self.corpus_count - self.rich_content
        topic_array = np.arange(self.start, self.max_steps * self.step +
                                self.start, self.step)
        for topic_count in topic_array:
            if self.pnmf_verbose:
                print('extracting {} topics...'.format(topic_count))
            self.topic_count = topic_count
            nmf = NMF(n_components=self.topic_count, init=self.init,
                      solver=self.solver, tol=self.tol, max_iter=self.max_iter,
                      random_state=self.random_state, alpha=self.alpha,
                      l1_ratio=self.l1_ratio, verbose=self.verbose,
                      shuffle=self.shuffle, nls_max_iter=self.nls_max_iter,
                      sparseness=self.sparseness, beta=self.beta,
                      eta=self.eta)
            W = nmf.fit_transform(matrix)
            self.nmf = nmf
            self.topic_labels = np.apply_along_axis(func1d=np.argmax,
                                                    axis=1, arr=W)
            self.topic_summary = Counter(self.topic_labels)
            if self._stopping_condition():
                if self.pnmf_verbose:
                    print('heuristic topic count is {}'
                          .format(self.topic_count - self.step))
                self.topic_count = self.topic_count - self.step
                nmf = NMF(n_components=self.topic_count, init=self.init,
                          solver=self.solver, tol=self.tol,
                          max_iter=self.max_iter,
                          random_state=self.random_state, alpha=self.alpha,
                          l1_ratio=self.l1_ratio, verbose=self.verbose,
                          shuffle=self.shuffle,
                          nls_max_iter=self.nls_max_iter,
                          sparseness=self.sparseness, beta=self.beta,
                          eta=self.eta)
                nmf.fit(matrix)
                self.nmf = self.previous_nmf
                return self.topic_count
            else:
                self.previous_nmf = nmf

estimator_checks.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def set_testing_parameters(estimator):
    # set parameters to speed up some estimators and
    # avoid deprecated behaviour
    params = estimator.get_params()
    if ("n_iter" in params
            and estimator.__class__.__name__ != "TSNE"):
        estimator.set_params(n_iter=5)
    if "max_iter" in params:
        warnings.simplefilter("ignore", ConvergenceWarning)
        if estimator.max_iter is not None:
            estimator.set_params(max_iter=min(5, estimator.max_iter))
        # LinearSVR
        if estimator.__class__.__name__ == 'LinearSVR':
            estimator.set_params(max_iter=20)
        # NMF
        if estimator.__class__.__name__ == 'NMF':
            estimator.set_params(max_iter=100)
        # MLP
        if estimator.__class__.__name__ in ['MLPClassifier', 'MLPRegressor']:
            estimator.set_params(max_iter=100)
    if "n_resampling" in params:
        # randomized lasso
        estimator.set_params(n_resampling=5)
    if "n_estimators" in params:
        # especially gradient boosting with default 100
        estimator.set_params(n_estimators=min(5, estimator.n_estimators))
    if "max_trials" in params:
        # RANSAC
        estimator.set_params(max_trials=10)
    if "n_init" in params:
        # K-Means
        estimator.set_params(n_init=2)
    if "decision_function_shape" in params:
        # SVC
        estimator.set_params(decision_function_shape='ovo')

    if estimator.__class__.__name__ == "SelectFdr":
        # be tolerant of noisy datasets (not actually speed)
        estimator.set_params(alpha=.5)

    if estimator.__class__.__name__ == "TheilSenRegressor":
        estimator.max_subpopulation = 100

    if isinstance(estimator, BaseRandomProjection):
        # Due to the jl lemma and often very few samples, the number
        # of components of the random matrix projection will be probably
        # greater than the number of features.
        # So we impose a smaller number (avoid "auto" mode)
        estimator.set_params(n_components=1)

    if isinstance(estimator, SelectKBest):
        # SelectKBest has a default of k=10
        # which is more feature than we have in most case.
        estimator.set_params(k=1)

    if isinstance(estimator, NMF):
        if not isinstance(estimator, ProjectedGradientNMF):
            estimator.set_params(solver='cd')