feature_weighting.py 文件源码-python代码片段

feature_weighting.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码

def fit(self, X, y=None):
        """Learn the document lenght and document frequency vector
        (if necessary).

        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        """
        X = check_array(X, ['csr'], copy=self.copy)
        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stp' or self.compute_df:
            self.df_ = _document_frequency(X)
        else:
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
        else:
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        if scheme_n.endswith('p') and self.norm_pivot is None:
            # Need to compute the pivot if it's not provided
            _, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,
                                              df_n_samples,
                                              norm_alpha=self.norm_alpha,
                                              norm_pivot=self.norm_pivot,
                                              return_pivot=True)

        return self