python类RFE的实例源码-面圈网

prepare_data.py 文件源码项目：datasciences 作者: BenChehade 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def greedy_elim(df):

    # do feature selection using boruta
    X = df[[x for x in df.columns if x!='SalePrice']]
    y = df['SalePrice']
    #model = RandomForestRegressor(n_estimators=50)
    model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.05)
    # 150 features seems to be the best at the moment. Why this is is unclear.
    feat_selector = RFE(estimator=model, step=1, n_features_to_select=150)

    # find all relevant features
    feat_selector.fit_transform(X.as_matrix(), y.as_matrix())

    # check selected features
    features_bool = np.array(feat_selector.support_)
    features = np.array(X.columns)
    result = features[features_bool]
    #print(result)

    # check ranking of features
    features_rank = feat_selector.ranking_
    #print(features_rank)
    rank = features_rank[features_bool]
    #print(rank)

    return result

11.6 feature_selection_bagging.py 文件源码项目：ML-note 作者: JasonK93 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_compare_with_no_feature_selection():
    '''
    compare the result before the selection and after
    :return: None
    '''
    iris=load_iris()
    X,y=iris.data,iris.target
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    X_t=selector.fit_transform(X,y)
    X_train,X_test,y_train,y_test=cross_validation.train_test_split(X, y,
                test_size=0.25,random_state=0,stratify=y)
    X_train_t,X_test_t,y_train_t,y_test_t=cross_validation.train_test_split(X_t, y,
                test_size=0.25,random_state=0,stratify=y)
    clf=LinearSVC()
    clf_t=LinearSVC()
    clf.fit(X_train,y_train)
    clf_t.fit(X_train_t,y_train_t)
    print("Original DataSet: test score=%s"%(clf.score(X_test,y_test)))
    print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t)))

wrangler.py 文件源码项目：tcsl 作者: machinelearningnanodegree 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def recurvise_index(self, clf,):
        # rank all features, i.e continue the elimination until the last one
        rfe = RFE(clf, n_features_to_select=1)
        rfe.fit(self.features, self.labels)
        # map recursive feature score to the feature names
        rfedict = {k: v for k, v in
                   zip(self.features.columns.tolist(),
                       map(lambda x: round(x, 4),
                           rfe.ranking_
                           )
                       )
                   }
        return rfedict

feature_selection.py 文件源码项目：Default-Credit-Card-Prediction 作者: AlexPnt 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def rfe_selection(X,y,n_features):
    """
    Performs the Recursive Feature Elimination method and selects the top ranking features

    Keyword arguments:
    X -- The feature vectors
    y -- The target vector
    n_features -- n best ranked features
    """

    if verbose:
        print '\nPerforming Feature Selection based on the Recursive Feature Elimination method ...'

    clf=RandomForestClassifierWithCoef(n_estimators=10,n_jobs=-1)
    fs= RFE(clf, n_features, step=1)
    fs= fs.fit(X,y)
    ranks=fs.ranking_

    feature_indexes=[]
    for i in xrange(len(ranks)):
        if ranks[i]==1:
            feature_indexes+=[i]

    return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features]     #return selected features and original index features

RecursiveFeatureElimination.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def __init__(self, conf):
        SemiSupervisedFeatureSelection.__init__(self, conf)
        self.projection = RFE(estimator = conf.model,
                              n_features_to_select = conf.num_components,
                              step = conf.step)

ml_feature_rank.py 文件源码项目：toho_mir_ml 作者: kodack64 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def featureRank(useFeature,trueSet,falseSet):

    # load data and split
    X_true = []
    for dn in trueSet:
        fin = open("./learn/data/"+useFeature+"_"+dn+".pkl","rb")
        X_true.append(pickle.load(fin))
        fin.close()
    X_true = np.vstack(X_true)
    print(X_true.shape)

    X_false = []
    for dn in falseSet:
        fin = open("./learn/data/"+useFeature+"_"+dn+".pkl","rb")
        X_false.append(pickle.load(fin))
        fin.close()
    X_false = np.vstack(X_false)
    print(X_false.shape)

    test_size = 0.3
    X_true_train,X_true_test = train_test_split(X_true ,test_size=test_size)
    X_false_train, X_false_test = train_test_split(X_false ,train_size=len(X_true_train),test_size=len(X_true_test))

    X = np.vstack([X_true_train,X_false_train])
    X_ = np.vstack([X_true_test,X_false_test])
    Y = [1]*len(X_true_train)+[0]*len(X_false_train)
    Y_ = [1]*len(X_true_test)+[0]*len(X_false_test)
    X,Y = shuffle(X,Y)
    X_,Y_ = shuffle(X_,Y_)

    featNames = ml_feature_name.getFeatureName(useFeature)

    clf = LinearSVC(C=0.1)
    rfe = RFE(estimator =clf, n_features_to_select=1,step=1)
    rfe.fit(X,Y)
    ranks = rfe.ranking_
    if(useFeature =="rp"):
        fout = open("./learn/feature/rp_feature_rank.txt","w")
        for i,r in enumerate(ranks):
            fout.write("{0} {1}\n".format(i,r))
        fout.close()

    rankFeat = list(zip(ranks,featNames))
    rankFeat.sort()
    for rf in rankFeat:
        if(useFeature in ["tfidf_1gram","tfidf_2gram","tfidf_3gram","tfidf_4gram"]):
            if(ml_feature_name.isDiatonic(rf[1])):
                print(rf)
        else:
            print(rf)

11.6 feature_selection_bagging.py 文件源码项目：ML-note 作者: JasonK93 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_RFE():
    '''
    test the method of RFE, the number of feature aim to 2
    :return: None
    '''
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)

sk_feature_process.py 文件源码项目：python_utils 作者: Jayhello 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def sk_feature_ref():
    # load the iris datasets
    dataset = datasets.load_iris()
    # create a base classifier used to evaluate a subset of attributes
    model_lr = LogisticRegression()
    # create the RFE model and select 3 attributes
    rfe = RFE(model_lr, 3)
    rfe = rfe.fit(dataset.data, dataset.target)
    # summarize the selection of the attributes
    print rfe.support_
    # [False  True  True  True]
    print rfe.ranking_
    # [2 1 1 1]
    print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), dataset.feature_names))
    # [(1.0, 'petal length (cm)'), (1.0, 'petal width (cm)'), (1.0, 'sepal width (cm)'), (2.0, 'sepal length (cm)')]

sk_feature_process.py 文件源码项目：python_utils 作者: Jayhello 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def sk_feature_ref_v2():
    X, Y = get_dummy_data()
    names = ['f1', 'f2', 'f3']

    model_lr = LogisticRegression()

    rfe = RFE(model_lr, 2)
    rfe = rfe.fit(X, Y)

    print rfe.support_
    print rfe.ranking_
    print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))

genericmodelclass.py 文件源码项目：easyML 作者: aarshayj 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def recursive_feature_elimination(self, nfeat=None, step=1, inplace=False):

        """A method to implement recursive feature elimination on the model.
        Note that CV is not performed in this function. The method will 
        continue to eliminate some features (specified by step parameter)
        at each step until the specified number of features are reached.

        Parameters
        __________
        nfeat : int or None, default=None
            The num of top features to select. If None, half of the features 
            are selected.

        step : int or float, default=1
            If int, then step corresponds to the number of features to remove
            at each iteration. 
            If float and within (0.0, 1.0), then step corresponds to the 
            percentage (rounded down) of features to remove at each 
            iteration.
            If float and greater than one, then integral part will be
            considered as an integer input

        inplace : bool, default=False
            If True, the predictors of the class are modified to those 
            selected by the RFE procedure.

        Returns
        _______
        selected : A series object containing the selected features as 
        index and their rank in selection as values
        """
        rfe = RFE(self.alg, n_features_to_select=nfeat, step=step)

        rfe.fit(
                self.datablock.train[self.predictors], 
                self.datablock.train[self.datablock.target]
                )

        ranks = pd.Series(rfe.ranking_, index=self.predictors)

        selected = ranks.loc[rfe.support_]

        if inplace:
            self.set_predictors(selected.index.tolist())

        return selected