python类SelectFromModel()的实例源码-面圈网

utils_feature_selection.py 文件源码项目：auto_ml 作者: ClimbsRocks 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]

utils_feature_selection.py 文件源码项目：auto_ml 作者: doordash 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLogisticRegression(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLasso(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]

extra_trees_preproc_for_regression.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def fit(self, X, Y):
        from sklearn.ensemble import ExtraTreesRegressor
        from sklearn.feature_selection import SelectFromModel

        num_features = X.shape[1]
        max_features = int(
            float(self.max_features) * (np.log(num_features) + 1))
        # Use at most half of the features
        max_features = max(1, min(int(X.shape[1] / 2), max_features))
        preprocessor = ExtraTreesRegressor(
            n_estimators=self.n_estimators, criterion=self.criterion,
            max_depth=self.max_depth, min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
            max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
            oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
            random_state=self.random_state)
        preprocessor.fit(X, Y)
        self.preprocessor = SelectFromModel(preprocessor, prefit=True)

        return self

extra_trees_preproc_for_classification.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def fit(self, X, Y, sample_weight=None):
        from sklearn.ensemble import ExtraTreesClassifier
        from sklearn.feature_selection import SelectFromModel

        num_features = X.shape[1]
        max_features = int(
            float(self.max_features) * (np.log(num_features) + 1))
        # Use at most half of the features
        max_features = max(1, min(int(X.shape[1] / 2), max_features))
        preprocessor = ExtraTreesClassifier(
            n_estimators=self.n_estimators, criterion=self.criterion,
            max_depth=self.max_depth, min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
            max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
            oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
            random_state=self.random_state, class_weight=self.class_weight
        )
        preprocessor.fit(X, Y, sample_weight=sample_weight)
        self.preprocessor = SelectFromModel(preprocessor, prefit=True)
        return self

OD_numpy_buf.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

onlinedetectWithlittleData.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

birchForChangeWindowSize.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)
    name = []
    for i in data.columns:
        name.append(i)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

onlinedetect.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

v1.1.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    data = anamolySample
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    return warnstr

v0.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

v1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data = pd.concat([anamolySample,normalSample])
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    return warnstr

simulatev1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data = pd.concat([anamolySample,normalSample])
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    print warnstr
    return warnstr

v0.2.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

utils_feature_selection.py 文件源码项目：auto_ml 作者: doordash 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def __init__(self, type_of_estimator, column_descriptions, feature_selection_model='SelectFromModel'):

        self.column_descriptions = column_descriptions
        self.type_of_estimator = type_of_estimator
        self.feature_selection_model = feature_selection_model

FeatureSelection.py 文件源码项目：ModelFlow 作者: yuezPrincetechs 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test():
    #??????
    np.random.seed(13)
    X=pd.DataFrame(np.random.randn(20,10))
    X.columns=['x%d'%i for i in range(10)]
    y=pd.Series(np.random.choice([0,1],20))

    #??sklearn?????????????
    clf_sklearn=feature_selection.SelectKBest(feature_selection.f_classif,k=4)
    clf=SklearnSelector(estimator=clf_sklearn)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)

    clf_sklearn=SelectFromModel(LogisticRegression())
    clf=SklearnSelector(estimator=clf_sklearn)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)

    #?????
    clf_selectkbest=feature_selection.SelectKBest(feature_selection.f_classif,k=4)
    clf_selectfrommodel=SelectFromModel(LogisticRegression())
    clf_baseselector=SklearnSelector(clf_selectkbest)
    clf=VotingSelector(selectors=[('clf_selectkbest',clf_selectkbest),
                                  ('clf_selectfrommodel',clf_selectfrommodel),
                                  ('clf_baseselector',clf_baseselector)],threshold=0.5)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)
    print(clf.df_voting)
    print(clf.score)

liblinear_svc_preprocessor.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def fit(self, X, Y):
        import sklearn.svm
        from sklearn.feature_selection import SelectFromModel

        self.C = float(self.C)
        self.tol = float(self.tol)

        self.dual = self.dual == 'True'
        self.fit_intercept = self.fit_intercept == 'True'
        self.intercept_scaling = float(self.intercept_scaling)

        if self.class_weight == "None":
            self.class_weight = None

        preprocessor = sklearn.svm.LinearSVC(penalty=self.penalty,
                                             loss=self.loss,
                                             dual=self.dual,
                                             tol=self.tol,
                                             C=self.C,
                                             class_weight=self.class_weight,
                                             fit_intercept=self.fit_intercept,
                                             intercept_scaling=self.intercept_scaling,
                                             multi_class=self.multi_class,
                                             random_state=self.random_state)
        preprocessor.fit(X, Y)
        self.preprocessor = SelectFromModel(preprocessor, prefit=True)
        return self

A_TL_LGB_LGB.py 文件源码项目：QH_FInSight 作者: yzkang 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def lgb_feature_selection(fe_name, matrix_x_temp, label_y, th):
    # SelectfromModel
    clf = LGBMClassifier(n_estimators=400)
    clf.fit(matrix_x_temp, label_y)
    sfm = SelectFromModel(clf, prefit=True, threshold=th)
    matrix_x = sfm.transform(matrix_x_temp)

    # ????????????????
    feature_score_dict = {}
    for fn, s in zip(fe_name, clf.feature_importances_):
        feature_score_dict[fn] = s
    m = 0
    for k in feature_score_dict:
        if feature_score_dict[k] == 0.0:
            m += 1
    print 'number of not-zero features:' + str(len(feature_score_dict) - m)

    # ????????
    feature_score_dict_sorted = sorted(feature_score_dict.items(),
                                       key=lambda d: d[1], reverse=True)
    print 'feature_importance:'
    for ii in range(len(feature_score_dict_sorted)):
        print feature_score_dict_sorted[ii][0], feature_score_dict_sorted[ii][1]
    print '\n'

    f = open('../eda/lgb_feature_importance.txt', 'w')
    f.write(th)
    f.write('\nRank\tFeature Name\tFeature Importance\n')
    for i in range(len(feature_score_dict_sorted)):
        f.write(str(i) + '\t' + str(feature_score_dict_sorted[i][0]) + '\t' + str(feature_score_dict_sorted[i][1]) + '\n')
    f.close()

    # ???????????
    how_long = matrix_x.shape[1]  # matrix_x ? ?????? ????
    feature_used_dict_temp = feature_score_dict_sorted[:how_long]
    feature_used_name = []
    for ii in range(len(feature_used_dict_temp)):
        feature_used_name.append(feature_used_dict_temp[ii][0])
    print 'feature_chooesed:'
    for ii in range(len(feature_used_name)):
        print feature_used_name[ii]
    print '\n'

    f = open('../eda/lgb_feature_chose.txt', 'w')
    f.write('Feature Chose Name :\n')
    for i in range(len(feature_used_name)):
        f.write(str(feature_used_name[i]) + '\n')
    f.close()

    # ??????????
    feature_not_used_name = []
    for i in range(len(fe_name)):
        if fe_name[i] not in feature_used_name:
            feature_not_used_name.append(fe_name[i])

    return matrix_x, feature_not_used_name[:], len(feature_used_name)

big.py 文件源码项目：sktransformers 作者: TomAugspurger 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def fit():
    X, y = generate()
    dX = dd.from_pandas(X, npartitions=10)
    y = dd.from_pandas(y, npartitions=10)

    pre_pipe = make_pipeline(
        CategoricalEncoder(),
        DummyEncoder(),
        Imputer(),
        SGDRegressor(),
    )

    pipe = make_pipeline(
        SelectFromModel(pre_pipe),
        GradientBoostingRegressor(),
    )
    X_ = pre_pipe.fit_transform(dX)

    for i in range(X_.npartitions):
        for j in range(5):
            print(i, j)
            X_sub = X_.get_partition(i).compute()
            y_sub = y.get_partition(i).compute()
            clf.partial_fit(X_sub, y_sub)

    sfm = SelectFromModel(clf, prefit=True)
    return pipe, clf, sfm

utils_feature_selection.py 文件源码项目：auto_ml 作者: ClimbsRocks 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __init__(self, type_of_estimator, column_descriptions, feature_selection_model='SelectFromModel'):

        self.column_descriptions = column_descriptions
        self.type_of_estimator = type_of_estimator
        self.feature_selection_model = feature_selection_model

classification.py 文件源码项目：DocumentClassification 作者: bahmanh 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def featuresByInformationGain(features,labels):
    treeCL = tree.DecisionTreeClassifier(criterion="entropy")
    treeCL = treeCL.fit(features,labels)
    transformed_features = SelectFromModel(treeCL,prefit=True).transform(features)
    return transformed_features

chapter_4.py 文件源码项目：python-machine-learning-book 作者: jeremyn 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def plot_feature_importances(columns, X_train, y_train):
    feat_labels = columns[1:]

    forest = RandomForestClassifier(n_estimators=10000, random_state=0)

    forest.fit(X_train, y_train)
    importances = forest.feature_importances_

    indices = np.argsort(importances)[::-1]

    for f in range(X_train.shape[1]):
        print("%2d) %-*s %f" % (
            f+1,
            30,
            feat_labels[indices[f]],
            importances[indices[f]],
        ))
    print()

    plt.title('Feature Importances')
    plt.bar(
        range(X_train.shape[1]),
        importances[indices],
        color='lightblue',
        align='center',
    )
    plt.xticks(range(X_train.shape[1]), feat_labels[indices], rotation=90)
    plt.xlim([-1, X_train.shape[1]])
    plt.show()

    feature_selector = SelectFromModel(forest, threshold=0.15, prefit=True)
    X_selected = feature_selector.transform(X_train)
    print(X_selected.shape)

CART_Trainer.py 文件源码项目：kdd99-scikit 作者: PENGZhaoqing 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def tree_based_selection(self, data_set, data_target, feature_names):
        """

        :param data_set:
        :return:
        """

        clf = ExtraTreesClassifier()
        clf = clf.fit(data_set, data_target)
        print clf.feature_importances_

        model = SelectFromModel(clf, prefit=True)
        feature_set = model.transform(data_set)

        fea_index = []
        for A_col in np.arange(data_set.shape[1]):
            for B_col in np.arange(feature_set.shape[1]):
                if (data_set[:, A_col] == feature_set[:, B_col]).all():
                    fea_index.append(A_col)

        check = {}
        for i in fea_index:
            check[feature_names[i]] = data_set[0][i]
        print np.array(check)

        return feature_set, fea_index

11.7 feature_selection_embeded.py 文件源码项目：ML-note 作者: JasonK93 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_SelectFromModel():
    '''
    test the method of SelectFromModel
    :return: None
    '''
    digits=load_digits()
    X=digits.data
    y=digits.target
    estimator=LinearSVC(penalty='l1',dual=False)
    selector=SelectFromModel(estimator=estimator,threshold='mean')
    selector.fit(X,y)
    selector.transform(X)
    print("Threshold %s"%selector.threshold_)
    print("Support is %s"%selector.get_support(indices=True))

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_invalid_input():
    clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=None)
    for threshold in ["gobbledigook", ".5 * gobbledigook"]:
        model = SelectFromModel(clf, threshold=threshold)
        model.fit(data, y)
        assert_raises(ValueError, model.transform, data)

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_input_estimator_unchanged():
    """
    Test that SelectFromModel fits on a clone of the estimator.
    """
    est = RandomForestClassifier()
    transformer = SelectFromModel(estimator=est)
    transformer.fit(data, y)
    assert_true(transformer.estimator is est)

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_feature_importances():
    X, y = datasets.make_classification(
        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
        n_repeated=0, shuffle=False, random_state=0)

    est = RandomForestClassifier(n_estimators=50, random_state=0)
    for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
        transformer = SelectFromModel(estimator=est, threshold=threshold)
        transformer.fit(X, y)
        assert_true(hasattr(transformer.estimator_, 'feature_importances_'))

        X_new = transformer.transform(X)
        assert_less(X_new.shape[1], X.shape[1])
        importances = transformer.estimator_.feature_importances_

        feature_mask = np.abs(importances) > func(importances)
        assert_array_almost_equal(X_new, X[:, feature_mask])

    # Check with sample weights
    sample_weight = np.ones(y.shape)
    sample_weight[y == 1] *= 100

    est = RandomForestClassifier(n_estimators=50, random_state=0)
    transformer = SelectFromModel(estimator=est)
    transformer.fit(X, y, sample_weight=sample_weight)
    importances = transformer.estimator_.feature_importances_
    transformer.fit(X, y, sample_weight=3 * sample_weight)
    importances_bis = transformer.estimator_.feature_importances_
    assert_almost_equal(importances, importances_bis)

    # For the Lasso and related models, the threshold defaults to 1e-5
    transformer = SelectFromModel(estimator=Lasso(alpha=0.1))
    transformer.fit(X, y)
    X_new = transformer.transform(X)
    mask = np.abs(transformer.estimator_.coef_) > 1e-5
    assert_array_equal(X_new, X[:, mask])

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_partial_fit():
    est = PassiveAggressiveClassifier(random_state=0, shuffle=False)
    transformer = SelectFromModel(estimator=est)
    transformer.partial_fit(data, y,
                            classes=np.unique(y))
    old_model = transformer.estimator_
    transformer.partial_fit(data, y,
                            classes=np.unique(y))
    new_model = transformer.estimator_
    assert_true(old_model is new_model)

    X_transform = transformer.transform(data)
    transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
    assert_array_equal(X_transform, transformer.transform(data))

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_warm_start():
    est = PassiveAggressiveClassifier(warm_start=True, random_state=0)
    transformer = SelectFromModel(estimator=est)
    transformer.fit(data, y)
    old_model = transformer.estimator_
    transformer.fit(data, y)
    new_model = transformer.estimator_
    assert_true(old_model is new_model)

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_threshold_string():
    est = RandomForestClassifier(n_estimators=50, random_state=0)
    model = SelectFromModel(est, threshold="0.5*mean")
    model.fit(data, y)
    X_transform = model.transform(data)

    # Calculate the threshold from the estimator directly.
    est.fit(data, y)
    threshold = 0.5 * np.mean(est.feature_importances_)
    mask = est.feature_importances_ > threshold
    assert_array_equal(X_transform, data[:, mask])

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_threshold_without_refitting():
    """Test that the threshold can be set without refitting the model."""
    clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=0)
    model = SelectFromModel(clf, threshold=0.1)
    model.fit(data, y)
    X_transform = model.transform(data)

    # Set a higher threshold to filter out more features.
    model.threshold = 1.0
    assert_greater(X_transform.shape[1], model.transform(data).shape[1])