def fit(self, X, y=None):
# Return if not imputing
if self.impute is False:
return self
# Grab list of object column names before doing imputation
self.object_columns = X.select_dtypes(include=['object']).columns.values
self.fill = pd.Series([X[c].value_counts().index[0]
if X[c].dtype == np.dtype('O')
or pd.core.common.is_categorical_dtype(X[c])
else X[c].mean() for c in X], index=X.columns)
if self.verbose:
num_nans = sum(X.select_dtypes(include=[np.number]).isnull().sum())
num_total = sum(X.select_dtypes(include=[np.number]).count())
percentage_imputed = num_nans / num_total * 100
print("Percentage Imputed: %.2f%%" % percentage_imputed)
print("Note: Impute will always happen on prediction dataframe, otherwise rows are dropped, and will lead "
"to missing predictions")
# return self for scikit compatibility
return self
评论列表
文章目录