def select_kbest_reg(data_frame, target, k=5):
"""
Selecting K-Best features regression
:param data_frame: A pandas dataFrame with the training data
:param target: target variable name in DataFrame
:param k: desired number of features from the data
:returns feature_scores: scores for each feature in the data as
pandas DataFrame
"""
feat_selector = SelectKBest(f_regression, k=k)
_ = feat_selector.fit(data_frame.drop(target, axis=1), data_frame[target])
feat_scores = pd.DataFrame()
feat_scores["F Score"] = feat_selector.scores_
feat_scores["P Value"] = feat_selector.pvalues_
feat_scores["Support"] = feat_selector.get_support()
feat_scores["Attribute"] = data_frame.drop(target, axis=1).columns
return feat_scores
python类f_regression()的实例源码
feat_regress.py 文件源码
项目:Stock-Market-Analysis-and-Prediction
作者: samshara
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def main():
from sklearn import svm
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.preprocessing import MinMaxScaler
X, y = samples_generator.make_classification(n_samples=1000, n_informative=5, n_redundant=4, random_state=_random_state)
anova_filter = SelectKBest(f_regression, k=5)
scaler = MinMaxScaler()
clf = svm.SVC(kernel='linear')
steps = [scaler, anova_filter, clf]
cached_run(steps, X, y)
def ANOVA(X,y):
'''Univariate linear regression tests
Quick linear model for sequentially testing the effect of many regressors
Using scikit learn's Feature selection toolbox
Returns:
F (array) = F-values for regressors
pvalues (array) = p-values for F-scores'''
(F,pvalues) = f_regression(X,y)
return (F,pvalues)
def get_params_for_est(estimator, name):
'''Choose initialization parameters for an estimator for auto-testing'''
is_classifier = ClassifierMixin in estimator.__mro__
is_cluster = ClusterMixin in estimator.__mro__
is_ensemble = BaseEnsemble in estimator.__mro__
uses_counts = any(c in name for c in USES_COUNTS)
as_1d = name in REQUIRES_1D
args, params, _ = get_args_kwargs_defaults(estimator.__init__)
est_keys = set(('estimator', 'base_estimator', 'estimators'))
est_keys = (set(params) | set(args)) & est_keys
if is_classifier:
score_func = feat.f_classif
else:
score_func = feat.f_regression
for key in est_keys:
if name == 'SelectFromModel':
params[key] = sklearn.linear_model.LassoCV()
elif is_classifier:
params[key] = sklearn.tree.DecisionTreeClassifier()
else:
params[key] = sklearn.tree.DecisionTreeRegressor()
if key == 'estimators':
params[key] = [(str(_), clone(params[key])) for _ in range(10)]
kw = dict(is_classifier=is_classifier, is_cluster=is_cluster,
is_ensemble=is_ensemble, uses_counts=uses_counts)
if 'score_func' in params:
params['score_func'] = score_func
X, y = make_X_y(**kw)
return X, y, params, kw