def test_feature_importances():
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
est = RandomForestClassifier(n_estimators=50, random_state=0)
for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
transformer = SelectFromModel(estimator=est, threshold=threshold)
transformer.fit(X, y)
assert_true(hasattr(transformer.estimator_, 'feature_importances_'))
X_new = transformer.transform(X)
assert_less(X_new.shape[1], X.shape[1])
importances = transformer.estimator_.feature_importances_
feature_mask = np.abs(importances) > func(importances)
assert_array_almost_equal(X_new, X[:, feature_mask])
# Check with sample weights
sample_weight = np.ones(y.shape)
sample_weight[y == 1] *= 100
est = RandomForestClassifier(n_estimators=50, random_state=0)
transformer = SelectFromModel(estimator=est)
transformer.fit(X, y, sample_weight=sample_weight)
importances = transformer.estimator_.feature_importances_
transformer.fit(X, y, sample_weight=3 * sample_weight)
importances_bis = transformer.estimator_.feature_importances_
assert_almost_equal(importances, importances_bis)
# For the Lasso and related models, the threshold defaults to 1e-5
transformer = SelectFromModel(estimator=Lasso(alpha=0.1))
transformer.fit(X, y)
X_new = transformer.transform(X)
mask = np.abs(transformer.estimator_.coef_) > 1e-5
assert_array_equal(X_new, X[:, mask])
评论列表
文章目录