feature_weighting.py 文件源码-python代码片段

feature_weighting.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码

def fit_transform(self, X, y=None):
        """Apply document term weighting and normalization on text features

        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        """
        X = check_array(X, ['csr'], copy=self.copy)

        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stpd' or self.compute_df:
            self.df_ = _document_frequency(X)
        else:
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
        else:
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
        else:
            df_n_samples = None

        X, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,
                                          df_n_samples,
                                          norm_alpha=self.norm_alpha,
                                          norm_pivot=self.norm_pivot,
                                          return_pivot=True)
        return X