transformers.py 文件源码-python代码片段

transformers.py 文件源码

python

阅读 39 收藏 0 点赞 0 评论 0

项目：healthcareai-py 作者: HealthCatalyst 项目源码文件源码

def fit(self, X, y=None):
        # Return if not imputing
        if self.impute is False:
            return self

        # Grab list of object column names before doing imputation
        self.object_columns = X.select_dtypes(include=['object']).columns.values

        self.fill = pd.Series([X[c].value_counts().index[0]
                               if X[c].dtype == np.dtype('O')
                                  or pd.core.common.is_categorical_dtype(X[c])
                               else X[c].mean() for c in X], index=X.columns)

        if self.verbose:
            num_nans = sum(X.select_dtypes(include=[np.number]).isnull().sum())
            num_total = sum(X.select_dtypes(include=[np.number]).count())
            percentage_imputed = num_nans / num_total * 100
            print("Percentage Imputed: %.2f%%" % percentage_imputed)
            print("Note: Impute will always happen on prediction dataframe, otherwise rows are dropped, and will lead "
                  "to missing predictions")

        # return self for scikit compatibility
        return self