def test_bagged_imputer_classification():
iris = load_iris()
# make DF, add species col
X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
X['species'] = iris.target
# shuffle...
X = shuffle_dataframe(X)
# set random indices to be null.. 15% should be good
rands = np.random.rand(X.shape[0])
mask = rands > 0.85
X['species'].iloc[mask] = np.nan
# define imputer, assert no missing
imputer = BaggedCategoricalImputer(cols=['species'])
y = imputer.fit_transform(X)
assert y['species'].isnull().sum() == 0, 'expected no null...'
# now test with a different estimator
imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier())
y = imputer.fit_transform(X)
assert y['species'].isnull().sum() == 0, 'expected no null...'
评论列表
文章目录