paretonmf.py 文件源码-python代码片段

def evaluate(self, matrix):
        """
        Args:
            matrix (2d array): this is the matrix of documents and tokens
            where the number of topics needs to be determined, this has worked
            with compressed sparse row matrices before
        Returns
            topic_count (int): this is the number of topics that IPNMF was
            able to pick up heuristically
        """
        if self.noise_pct == 'auto':
            self._pareto_corpus_content(matrix, .8)
        if self.step == 'auto':
            self._determine_auto_step_size(matrix)
        if self.pnmf_verbose:
            print('initializing evaluation...')
        self.corpus_count = matrix.shape[0]
        self.rich_content = int(self.corpus_count * (1-self.noise_pct))
        self.noise_content = self.corpus_count - self.rich_content
        topic_array = np.arange(self.start, self.max_steps * self.step +
                                self.start, self.step)
        for topic_count in topic_array:
            if self.pnmf_verbose:
                print('extracting {} topics...'.format(topic_count))
            self.topic_count = topic_count
            nmf = NMF(n_components=self.topic_count, init=self.init,
                      solver=self.solver, tol=self.tol, max_iter=self.max_iter,
                      random_state=self.random_state, alpha=self.alpha,
                      l1_ratio=self.l1_ratio, verbose=self.verbose,
                      shuffle=self.shuffle, nls_max_iter=self.nls_max_iter,
                      sparseness=self.sparseness, beta=self.beta,
                      eta=self.eta)
            W = nmf.fit_transform(matrix)
            self.nmf = nmf
            self.topic_labels = np.apply_along_axis(func1d=np.argmax,
                                                    axis=1, arr=W)
            self.topic_summary = Counter(self.topic_labels)
            if self._stopping_condition():
                if self.pnmf_verbose:
                    print('heuristic topic count is {}'
                          .format(self.topic_count - self.step))
                self.topic_count = self.topic_count - self.step
                nmf = NMF(n_components=self.topic_count, init=self.init,
                          solver=self.solver, tol=self.tol,
                          max_iter=self.max_iter,
                          random_state=self.random_state, alpha=self.alpha,
                          l1_ratio=self.l1_ratio, verbose=self.verbose,
                          shuffle=self.shuffle,
                          nls_max_iter=self.nls_max_iter,
                          sparseness=self.sparseness, beta=self.beta,
                          eta=self.eta)
                nmf.fit(matrix)
                self.nmf = self.previous_nmf
                return self.topic_count
            else:
                self.previous_nmf = nmf