def recursive_feature_elimination_cv(self, step=1, inplace=False):
"""A method to implement recursive feature elimination on the model
with cross-validation(CV). At each step, features are ranked as per
the algorithm used and lowest ranked features are removed,
as specified by the step argument. At each step, the CV score is
determined using the scoring metric specified in the model. The set
of features with highest cross validation scores is then chosen.
Parameters
__________
step : int or float, default=1
If int, then step corresponds to the number of features to remove
at each iteration.
If float and within (0.0, 1.0), then step corresponds to the
percentage (rounded down) of features to remove at each
iteration.
If float and greater than one, then integral part will be
considered as an integer input
inplace : bool, default=False
If True, the predictors of the class are modified to those
selected by the RFECV procedure.
Returns
_______
selected : pandas series
A series object containing the selected features as
index and their rank in selection as values
"""
rfecv = RFECV(
self.alg, step=step,cv=self.cv_folds,
scoring=self.scoring_metric,n_jobs=-1
)
rfecv.fit(
self.datablock.train[self.predictors],
self.datablock.train[self.datablock.target]
)
if step>1:
min_nfeat = (len(self.predictors)
- step*(len(rfecv.grid_scores_)-1))
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score")
plt.plot(
range(min_nfeat, len(self.predictors)+1, step),
rfecv.grid_scores_
)
plt.show(block=False)
ranks = pd.Series(rfecv.ranking_, index=self.predictors)
selected = ranks.loc[rfecv.support_]
if inplace:
self.set_predictors(selected.index.tolist())
return ranks
评论列表
文章目录