python类GridSearchCV()的实例源码

model_pipeline.py 文件源码 项目:texta 作者: texta-tk 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train_model_with_cv(model, params, X, y):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

    # Use Train data to parameter selection in a Grid Search
    gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5)
    gs_clf = gs_clf.fit(X_train, y_train)
    model = gs_clf.best_estimator_

    # Use best model and test data for final evaluation
    y_pred = model.predict(X_test)

    _f1 = f1_score(y_test, y_pred, average='micro')
    _confusion = confusion_matrix(y_test, y_pred)
    __precision = precision_score(y_test, y_pred)
    _recall = recall_score(y_test, y_pred)
    _statistics = {'f1_score': _f1,
                   'confusion_matrix': _confusion,
                   'precision': __precision,
                   'recall': _recall
                   }

    return model, _statistics
_pipeline.py 文件源码 项目:palladio 作者: slipguru 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fit(self, X, y=None):
        """Fitting function on the data."""
        if self.data_normalizer is not None:
            X = self.normalize_data(X)

        if self.label_normalizer is not None:
            y = self.normalize_label(y)

        if self.force_classifier:
            clf = make_classifier(self.learner, params=self.learner_options)
        elif callable(self.learner):
            # self.learner = type(self.learner)
            clf = self.learner(**self.learner_options)
        else:
            clf = self.learner

        self.gs_ = GridSearchCV(estimator=clf, **self.cv_options)
        self.gs_.fit(X, y)
_model.py 文件源码 项目:probablyPOTUS 作者: jjardel 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def train(self, train_size=0.8, k_folds=5):

        # retrieve data from DB and pre-process
        self._get_data()

        # perform train/test split
        self._get_train_test_split(train_size=train_size)

        # define text pre-processing pipeline
        text_pipeline = Pipeline([
            ('extract_text', DFColumnExtractor(TEXT_FEATURES)),
            ('vect', TfidfVectorizer(tokenizer=twitter_tokenizer))
        ])

        # define pipeline for pre-processing of numeric features
        numeric_pipeline = Pipeline([
            ('extract_nums', DFColumnExtractor(NON_TEXT_FEATURES)),
            ('scaler', MinMaxScaler())
        ])

        # combine both steps into a single pipeline
        pipeline = Pipeline([
            ('features', FeatureUnion([
                ('text_processing', text_pipeline),
                ('num_processing', numeric_pipeline)
            ])),
            ('clf', self._estimator)
        ])

        self.logger.info('Fitting model hyperparameters with {0}-fold CV'.format(k_folds))
        gs = GridSearchCV(pipeline, self.params, n_jobs=-1, cv=k_folds)

        X = self.data.iloc[self.train_inds_, :]
        y = self.data[LABEL].values[self.train_inds_]

        gs.fit(X, y)

        self.logger.info('Validation set accuracy is {0}'.format(gs.best_score_))

        self.gs_ = gs
        self.model_ = gs.best_estimator_
grid_search.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fit(self, X, y=None, groups=None):
            """Run fit with all sets of parameters.

            Parameters
            ----------

            X : array-like, shape=(n_samples, n_features)
                Training vector, where n_samples is the number of samples and
                n_features is the number of features.

            y : array-like, shape=(n_samples,) or (n_samples, n_output), optional (default=None)
                Target relative to X for classification or regression;
                None for unsupervised learning.

            groups : array-like, shape=(n_samples,), optional (default=None)
                Group labels for the samples used while splitting the dataset into
                train/test set.
            """
            return super(GridSearchCV, self).fit(X, _as_numpy(y), groups)
xg_train.py 文件源码 项目:trend_ml_toolkit_xgboost 作者: raymon-tian 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def tune_xgb_cv(params_untuned,scoring='roc_auc', n_jobs=4, cv=5):
    # global  dtrain_whole
    global  num_boost_round
    global  params_sklearn

    # global x
    # global y
    for param_untuned in params_untuned:
        print '==========  ', param_untuned, '  =============='
        print_params(params_sklearn)
        estimator = xgb.XGBClassifier(**params_sklearn)
        grid_search = GridSearchCV(estimator, param_grid=param_untuned, scoring=scoring, n_jobs=n_jobs, cv=cv, verbose=10)
        grid_search.fit(x, y)
        df0 = pd.DataFrame(grid_search.cv_results_)
        df = pd.DataFrame(grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
        # print df0
        print df
        print 'the best_params : ', grid_search.best_params_
        print 'the best_score  : ', grid_search.best_score_
        # print grid_search.cv_results_
        for k,v in grid_search.best_params_.items():
            params_sklearn[k] = v
            if len(params_untuned)==1:
                return v
test.py 文件源码 项目:trend_ml_toolkit_xgboost 作者: raymon-tian 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def tune_xgb_cv(params_untuned,scoring='roc_auc', n_jobs=1, cv=5):
    # global  dtrain_whole
    global  num_boost_round
    global  params_sklearn
    # global x
    # global y
    for param_untuned in params_untuned:
        print '==========  ', param_untuned, '  =============='
        print_params(params_sklearn)
        estimator = xgb.XGBClassifier(**params_sklearn)
        grid_search = GridSearchCV(estimator, param_grid=param_untuned, scoring=scoring, n_jobs=n_jobs, cv=cv, verbose=10)
        grid_search.fit(x, y)
        df0 = pd.DataFrame(grid_search.cv_results_)
        df = pd.DataFrame(grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
        # print df0
        print df
        print 'the best_params : ', grid_search.best_params_
        print 'the best_score  : ', grid_search.best_score_
        # print grid_search.cv_results_
        for k,v in grid_search.best_params_.items():
            params_sklearn[k] = v
tools.py 文件源码 项目:trend_ml_toolkit_xgboost 作者: raymon-tian 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def tune_classifier(estimator,params,X_train,Y_train,scoring='roc_auc',n_jobs=3,cv=5):
    results = []
    for k,values in params.items():
        params_single = dict(k=values)
        print '==========  ',params_single,'  =============='
        grid_search = GridSearchCV(estimator,param_grid=params_single,scoring=scoring,n_jobs=n_jobs,cv=cv,verbose=5)
        grid_search.fit(X_train,Y_train)
        df0 = pd.DataFrame(grid_search.cv_results_)
        df = pd.DataFrame(grid_search.cv_results_)[['params','mean_train_score','mean_test_score']]
        # print df0
        print df
        print 'the best_params : ',grid_search.best_params_
        print 'the best_score  : ',grid_search.best_score_
        # print grid_search.cv_results_
        results.append(grid_search.best_params_)
    return results
xg_train_slower.py 文件源码 项目:trend_ml_toolkit_xgboost 作者: raymon-tian 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def tune_xgb_cv(params_untuned,params_sklearn,scoring='roc_auc', n_jobs=4, cv=5,verbose=10):

    for param_untuned in params_untuned:
        print '==========  ', param_untuned, '  =============='
        print_params(params_sklearn)
        estimator = xgb.XGBClassifier(**params_sklearn)
        # if(param_untuned.keys()[0] == 'n_estimators'):
        #     cv = 1
        grid_search = GridSearchCV(estimator, param_grid=param_untuned, scoring=scoring, n_jobs=n_jobs, cv=cv, verbose=verbose)
        grid_search.fit(x, y)
        df = pd.DataFrame(grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
        print df
        print 'the best_params : ', grid_search.best_params_
        print 'the best_score  : ', grid_search.best_score_
        for k,v in grid_search.best_params_.items():
            params_sklearn[k] = v
    return estimator,params_sklearn
test_optimisation.py 文件源码 项目:uncover-ml 作者: GeoscienceAustralia 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_pipeline(get_models, get_transform, get_kernel):

    alg, model = get_models
    trans = get_transform()
    kernel = get_kernel() + WhiteKernel()

    pipe = Pipeline(steps=[(alg, model())])
    param_dict = {}
    if hasattr(model(), 'n_estimators'):
        param_dict[alg + '__n_estimators'] = [5]
    if hasattr(model(), 'kernel'):
        param_dict[alg + '__kernel'] = [kernel]
    param_dict[alg + '__target_transform'] = [trans]

    estimator = GridSearchCV(pipe,
                             param_dict,
                             n_jobs=1,
                             iid=False,
                             pre_dispatch=2,
                             verbose=True,
                             )
    np.random.seed(10)
    estimator.fit(X=1 + np.random.rand(10, 3), y=1. + np.random.rand(10))
    assert estimator.cv_results_['mean_train_score'][0] > -15.0
test_optimisation.py 文件源码 项目:uncover-ml 作者: GeoscienceAustralia 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_svr_pipeline(get_transform, get_svr_kernel):
    trans = get_transform()
    pipe = Pipeline(steps=[('svr', svr())])
    param_dict = {'svr__kernel': [get_svr_kernel]}
    param_dict['svr__target_transform'] = [trans]

    estimator = GridSearchCV(pipe,
                             param_dict,
                             n_jobs=1,
                             iid=False,
                             pre_dispatch=2,
                             verbose=True,
                             )
    np.random.seed(1)
    estimator.fit(X=1 + np.random.rand(10, 5), y=1. + np.random.rand(10))
    assert estimator.cv_results_['mean_train_score'][0] > -10.0
test_optimisation.py 文件源码 项目:uncover-ml 作者: GeoscienceAustralia 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_krige_pipeline(get_krige_method, get_variogram_model):
    pipe = Pipeline(steps=[('krige', Krige(method=get_krige_method))])
    param_dict = {'krige__variogram_model': [get_variogram_model]}

    estimator = GridSearchCV(pipe,
                             param_dict,
                             n_jobs=1,
                             iid=False,
                             pre_dispatch=2,
                             verbose=True
                            )
    np.random.seed(1)
    X = np.random.randint(0, 400, size=(20, 2)).astype(float)
    y = 5*np.random.rand(20)
    estimator.fit(X=X, y=y)
    assert estimator.cv_results_['mean_train_score'][0] > -1.0
test_pyglmnet.py 文件源码 项目:pyglmnet 作者: glm-tools 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(X.shape[0], 5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert_equal(len(scores), 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)
Classifier.py 文件源码 项目:SecuML 作者: ANSSI-FR 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def setBestParameters(self):
        cv = StratifiedKFold(n_splits = self.conf.num_folds)
        param_grid = self.conf.getParamGrid()
        if param_grid is None:
            # No parameter value to select
            return
        if self.conf.families_supervision:
            scoring = 'f1_macro'
        else:
            scoring = 'roc_auc'
        grid_search = GridSearchCV(self.pipeline, param_grid = param_grid,
                scoring = scoring,
                cv = cv,
                n_jobs = -1,
                fit_params = {'model__sample_weight': self.datasets.sample_weight})
        grid_search.fit(self.datasets.train_instances.getFeatures(),
                self.getSupervision(self.datasets.train_instances))
        self.conf.setBestValues(grid_search)
        self.pipeline.set_params(**self.conf.getBestValues())
        return cv
model_select.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def xgb_model_select(file_name):  
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    xgb_clf = xgb.XGBRegressor() 
    parameters = {'n_estimators': [120, 100, 140], 'max_depth':[3,5,7,9]}
    grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
model_select.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gbdt_select_model(file_name):
    train_df = read_from_file(file_name)
    #featrue 16
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    gbdt = GradientBoostingRegressor() 
    parameters = {'n_estimators': [100, 120], 'max_depth':[4, 5, 6]}
    grid_search = GridSearchCV(estimator=gbdt, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
GBDT_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def select_model(file_name):
    train_df = read_from_file(file_name)
    #featrue 16
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    gbdt = GradientBoostingRegressor() 
    parameters = {'n_estimators': [10000, 12000], 'max_depth':[16,15, 14]}
    grid_search = GridSearchCV(estimator=gbdt, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
XGB_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def xgb_model_select(train_file_name):  
    train_df = merge_features_to_use(train_file_name)
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    xgb_clf = xgb.XGBRegressor() 
    parameters = {'n_estimators': [120, 100, 140], 'max_depth':[3,5,7,9], 'gamma':[0.1,0.3,0.5,0.7], 'min_child_weight':[1,3,5,7], }
    grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)
feature_engineering.py 文件源码 项目:LSAT 作者: BillVanderLugt 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def grid(X, y):
    '''
    Adapted from: http://scikit-learn.org/stable/auto_examples/model_selection/grid_search_text_feature_extraction.html#sphx-glr-auto-examples-model-selection-grid-search-text-feature-extraction-py
    Perform a grid search.
    '''

    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, cv=8)

    print("Performing grid search...")
    print("pipeline:", [name for name, _ in pipeline.steps])
    print("parameters:")
    pprint(parameters)
    t0 = time()
    grid_search.fit(X, y)
    print("done in %0.3fs" % (time() - t0))
    print()

    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
ensemble.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def fit(self, df, y, param_grid=None):
        from sklearn.model_selection import GridSearchCV
        X = df.drop(y, axis=1).values
        y = df[y].values

        meta_X = self.get_meta(X)

        if param_grid is not None:
            model = self.stacked_model_class()
            gridsearch = GridSearchCV(model, param_grid)
            gridsearch.fit(meta_X, y)
            self.stacked_model = self.stacked_model_class(**gridsearch.best_params_)
        else:
            self.stacked_model = self.stacked_model_class()

        self.stacked_model.fit(meta_X, y)
train.py 文件源码 项目:Emotion-Recognition 作者: HashCode55 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def grid_search_cv(clf, x, y, params, cv = 5):
    """
    :param clf: The classifier over which we want to perform 
    gridsearch.
    :param x: Features 
    :param y: Target
    :param params: Hyperparameters to perform gs on
    :cv: kfold cv parameter
    """
    gs = GridSearchCV(clf, param_grid = params, cv = cv)
    gs.fit(x, y)
    print 
    print 'BEST PARAMS:', gs.best_params_
    print 'BEST SCORE:', gs.best_score_
    print 
    best_estimator = gs.best_estimator_
    return best_estimator

######################
# PREPARING THE DATA #
######################

#get the last 4 images from each file
grid_search.py 文件源码 项目:Quora-Kaggle 作者: PPshrimpGo 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def LogisticRegression(X_train, y_train):
    from sklearn.linear_model import LogisticRegression
    parameters = {
        'C':[0.6, 0.8, 1.0, 1.2],
        'class_weight':[None, 'balanced'],
    }

    LR = LogisticRegression()
    grid_search = GridSearchCV(estimator=LR, param_grid=parameters, cv=5, scoring='neg_log_loss',n_jobs=4)

    now = datetime.datetime.now()
    print ("logestic regression grid_search start in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    grid_search.fit(X_train, y_train)
    print ("logestic regression grid_search done in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    results = grid_search.grid_scores_
    for result in results:
        print(result)
    print("\nBest score: %0.3f\n" % grid_search.best_score_)
    print ("---------best parameters---------")
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print ("%s: %r" % (param_name, best_parameters[param_name]))
classification.py 文件源码 项目:crime_prediction 作者: livenb 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def build_grid_search(X, y):
    parameters = {
        "estimator__criterion": ['gini', 'entropy'],
        "estimator__max_depth": [10, 15, 20, 25, None],
        "estimator__max_features": ['auto', 'sqrt', 'log2', None]
    }
    ovr = OneVsRestClassifier(RandomForestClassifier(n_estimators=1000,
                                    oob_score=True, n_jobs=-1, verbose=1))
    model_tunning = GridSearchCV(ovr, param_grid=parameters, verbose=1,
                                 n_jobs=-1, cv=10,
                                 scoring=make_scorer(f1_score))
    model_tunning.fit(X, y)
    test_score = model_tunning.best_score_
    print 'The best test score: ', test_score
    y_score = model_tunning.predict_proba(X_test)
    multiclass_roc(y_score, 'grid_search_02')
    return model_tunning
tuner.py 文件源码 项目:xgboost-tuner 作者: cwerner87 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def clean_params_for_sk(params: dict) -> dict:
    """
    Given a dictionary of XGB parameters, return a copy without parameters that will cause issues with scikit-learn's grid or
    randomized search estimators.

    :param params:
        A dictionary of XGB parameters.
    :return: 
        A copy of the same dictionary without the aforementioned problematic parameters.
    """
    # In the xgb.cv call, nthread should be equal to the CPU count, but this causes a hang when
    # called through GridSearchCV - parallelism should be achieved through its n_jobs parameter.
    # See https://github.com/scikit-learn/scikit-learn/issues/6627 for more details.
    params_copy = params.copy()
    params_copy['nthread'] = 1

    # In multiclass problems, this parameter is required for XGBoost, but is not a parameter of interest to be tuned.
    if 'num_class' in params_copy.keys():
        del params_copy['num_class']

    return params_copy
models.py 文件源码 项目:Bacchus 作者: surfstudio 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def fit(self, X, *args, **kwargs):
        if self._grid_search:
            model = GridSearchCV(self._model, **self._grid_search)
        elif self._random_search:
            model = RandomizedSearchCV(self._model, **self._random_search)
        else:
            model = self._model

        if self._grid_search is not None:
            self._grid = model
        elif self._random_search is not None:
            self._rnd = model

        assert (self.target in X.columns.values), 'X must contain the target column'
        self._xcols = list(X.columns.values)
        self._xcols.remove(self.target)
        if len(self._columns_exclude) == 0 and len(self._columns_include) > 0:
            self._columns_exclude = list(set(self._xcols) - set(self._columns_include))
        [self._xcols.remove(t) for t in self._columns_exclude]
        x = X[self._xcols]
        y = X[self.target]
        model.fit(x, y, **kwargs)
        return self
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def gs_numpy( method, X, Y, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1, disp = True):
    """
    Grid search method with numpy array of X and Y
    Previously, np.mat are used for compatible with Matlab notation.    
    """
    if disp:
        print( X.shape, Y.shape)

    clf = getattr( linear_model, method)()
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( X)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( X, Y)

    return gs
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def gs_classfier( classifier, xM, yVc, params, n_splits=5, n_jobs=-1):
    """
    gs = gs_classfier( classifier, xM, yVc, params, n_splits=5, n_jobs=-1)

    Inputs
    ======
    classifier = svm.SVC(), for example

    param = {"C": np.logspace(-2,2,5)}
    """
    #print(xM.shape, yVc.shape)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    gs = model_selection.GridSearchCV( classifier, params, cv=kf5_c, n_jobs=n_jobs)
    gs.fit( xM, yVc)

    return gs
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_splits = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, XX)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf_n = kf5_ext_c.split( A_list[0])
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gs_BIKE_Ridge( A_list, yV, alphas_log = (1, -1, 9), X_concat = None, n_splits = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, X_concat)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf_n = kf5_ext_c.split( A_list[0])
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def gs_numpy( method, X, Y, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1, disp = True):
    """
    Grid search method with numpy array of X and Y
    Previously, np.mat are used for compatible with Matlab notation.    
    """
    if disp:
        print( X.shape, Y.shape)

    clf = getattr( linear_model, method)()
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( X)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( X, Y)

    return gs
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1, graph=False):
    """
    gs = gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1)

    Inputs
    ======
    model = svm.SVC(), or linear_model.LinearRegression(), for example
    param = {"C": np.logspace(-2,2,5)}
    """
    #print(xM.shape, yVc.shape)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    gs = model_selection.GridSearchCV( model, param_grid, cv=kf5_c, n_jobs=n_jobs)
    gs.fit( X, y)

    if graph:
        plt.plot( gs.cv_results_["mean_train_score"], label='E[Train]')
        plt.plot( gs.cv_results_["mean_test_score"], label='E[Test]')
        plt.legend(loc=0)
        plt.grid()

    return gs


问题


面经


文章

微信
公众号

扫码关注公众号