python类RandomizedLogisticRegression()的实例源码-面圈网

utils_feature_selection.py 文件源码项目：auto_ml 作者: doordash 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLogisticRegression(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLasso(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]

manufacture.py 文件源码项目：forward 作者: yajun0601 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def LogisticRegression(result):
#    dd = pd.DataFrame(Variance)
    dd = result
#    dd['flag'] = df_flag

    from random import shuffle
    data = dd.as_matrix()
    shuffle(data)
    p = 0.8 # train/test ratio
    m,n = np.shape(data)
    train = data[:int(m*p),:]
    test = data[int(m*p):,:]
    data = result
    df_flag = result.pop('df')
    from sklearn.linear_model import LogisticRegression as LR
    from sklearn.linear_model import RandomizedLogisticRegression as RLR 

    x = result.values
    y = df_flag.values
    rlr = RLR() #?????????????
    rlr.fit(x, y) #????
    rlr.get_support() #??????????????
    print(u'??????????????')
    print(u'??????%s' % ','.join(data.columns[rlr.get_support()]))
    x = data[data.columns[rlr.get_support()]].as_matrix() # 

    lr = LR() # ????????
    lr.fit(x, y) # ??????????????
    print(u'????')
    print(u'???????%s' % lr.score(x, y))

feat_selection.py 文件源码项目：kaggle_airbnb 作者: svegapons 项目源码文件源码阅读 73 收藏 0 点赞 0 评论 0

def log_reg_feat_selection(X_train, y_train, X_valid, y_valid, random_state):
    """
    Feature selection based on the scores given to the features by the 
    RandomizedLogisticRegression algorithm.
    """

    rlr = RandomizedLogisticRegression(C=[0.001, 0.01, 0.1, 1.], 
                                       sample_fraction=0.7,
                                       n_resampling=200, selection_threshold=0.25,
                                       verbose=5, n_jobs=-1, random_state=0)                                   
    rlr.fit(X_train, y_train)
    np.save('save/feat_sel_log_reg.npy', rlr.scores_)

    return rlr.scores_

wrangler.py 文件源码项目：tcsl 作者: machinelearningnanodegree 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def stabilty_index(self, clf):
        # sklearn implements stability selection in
        # RandomizedLogisticRegression class only
        clf.fit(self.features, self.labels)
        # map Feature scores between 0 and 1. to the feature names
        stabledict = {k: v for k, v in
                      zip(self.features.columns.tolist(),
                          map(lambda x: round(x, 4),
                              clf.scores_
                              )
                          )
                      }
        return stabledict

wrangler.py 文件源码项目：tcsl 作者: machinelearningnanodegree 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _get_clfs(self):
        clf_dict = {"rlrclf": RandomizedLogisticRegression(),
                    "rfclf": RandomForestClassifier(criterion='entropy'),
                    "dtrclf": DecisionTreeClassifier(criterion='entropy'),
                    "lrclf": LogisticRegression()
                    }
        return clf_dict

ngram_featurizer.py 文件源码项目：rasa_nlu 作者: RasaHQ 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def _sort_applicable_ngrams(self, list_of_ngrams, examples, labels):
        """Given an intent classification problem and a list of ngrams,

        creates ordered list of most useful ngrams."""

        if list_of_ngrams:
            from sklearn import linear_model, preprocessing
            import numpy as np

            # filter examples where we do not have enough labeled instances for cv
            usable_labels = []
            for label in np.unique(labels):
                lab_sents = np.array(examples)[np.array(labels) == label]
                if len(lab_sents) < self.min_intent_examples_for_ngram_classification:
                    continue
                usable_labels.append(label)

            mask = [label in usable_labels for label in labels]
            if any(mask) and len(usable_labels) >= 2:
                try:
                    examples = np.array(examples)[mask]
                    labels = np.array(labels)[mask]

                    X = np.array(self._ngrams_in_sentences(examples, list_of_ngrams))
                    intent_encoder = preprocessing.LabelEncoder()
                    intent_encoder.fit(labels)
                    y = intent_encoder.transform(labels)

                    clf = linear_model.RandomizedLogisticRegression(C=1)
                    clf.fit(X, y)
                    scores = clf.scores_
                    sort_idx = [i[0] for i in sorted(enumerate(scores), key=lambda x: -1 * x[1])]

                    return np.array(list_of_ngrams)[sort_idx]
                except ValueError as e:
                    if "needs samples of at least 2 classes" in str(e):
                        # we got unlucky during the random sampling :( and selected a slice that only contains one class
                        return []
                    else:
                        raise e
            else:
                # there is no example we can use for the cross validation
                return []
        else:
            return []

ngram_featurizer.py 文件源码项目：Rasa_NLU_Chi 作者: crownpku 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _sort_applicable_ngrams(self, list_of_ngrams, examples, labels):
        """Given an intent classification problem and a list of ngrams,

        creates ordered list of most useful ngrams."""

        if list_of_ngrams:
            from sklearn import linear_model, preprocessing
            import numpy as np

            # filter examples where we do not have enough labeled instances for cv
            usable_labels = []
            for label in np.unique(labels):
                lab_sents = np.array(examples)[np.array(labels) == label]
                if len(lab_sents) < self.min_intent_examples_for_ngram_classification:
                    continue
                usable_labels.append(label)

            mask = [label in usable_labels for label in labels]
            if any(mask) and len(usable_labels) >= 2:
                try:
                    examples = np.array(examples)[mask]
                    labels = np.array(labels)[mask]

                    X = np.array(self._ngrams_in_sentences(examples, list_of_ngrams))
                    intent_encoder = preprocessing.LabelEncoder()
                    intent_encoder.fit(labels)
                    y = intent_encoder.transform(labels)

                    clf = linear_model.RandomizedLogisticRegression(C=1)
                    clf.fit(X, y)
                    scores = clf.scores_
                    sort_idx = [i[0] for i in sorted(enumerate(scores), key=lambda x: -1 * x[1])]

                    return np.array(list_of_ngrams)[sort_idx]
                except ValueError as e:
                    if "needs samples of at least 2 classes" in str(e):
                        # we got unlucky during the random sampling :( and selected a slice that only contains one class
                        return []
                    else:
                        raise e
            else:
                # there is no example we can use for the cross validation
                return []
        else:
            return []