python类GridSearchCV()的实例源码

kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_splits = 5, n_jobs = -1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge( A_list, XX)
    parmas = {'alpha': np.logspace( *alphas_log)}
    ln = A_list[0].shape[0] # ls is the number of molecules.

    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf_n = kf5_ext_c.split( A_list[0])
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)

    AX_idx = np.array([list(range( ln))]).T
    gs.fit( AX_idx, yV)

    return gs
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def gs_Lasso(xM, yV, alphas_log=(-1, 1, 9), n_folds=5, n_jobs=-1):

    print(xM.shape, yV.shape)

    clf = linear_model.Lasso()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace(*alphas_log)}
    kf5_c = model_selection.KFold(n_folds=n_folds, shuffle=True)
    kf5 = kf5_c.split(xM)

    gs = model_selection.GridSearchCV(
        clf, parmas, scoring='r2', cv=kf5, n_jobs=n_jobs)

    gs.fit(xM, yV)

    return gs
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _gs_SVC_r0(xM, yVc, params):
    """
    Since classification is considered, we use yVc which includes digital values 
    whereas yV can include float point values.
    """

    print(xM.shape, yVc.shape)

    clf = svm.SVC()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    kf5_c = model_selection.KFold(n_splits=5, shuffle=True)
    kf5 = kf5_c.split(xM)
    gs = model_selection.GridSearchCV(clf, params, cv=kf5, n_jobs=-1)

    gs.fit(xM, yVc)

    return gs
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def gs_SVC(xM, yVc, params, n_folds=5):
    """
    Since classification is considered, we use yVc which includes digital values 
    whereas yV can include float point values.
    """

    print(xM.shape, yVc.shape)

    clf = svm.SVC()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf5 = kf5_c.split(xM)
    gs = model_selection.GridSearchCV(clf, params, cv=kf5, n_jobs=-1)

    gs.fit(xM, yVc)

    return gs
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
    """
    Parameters
    -------------
    scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
    """
    print('If scoring is not r2 but error metric, output score is revered for scoring!')
    print(xM.shape, yV.shape)

    clf = linear_model.Ridge()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace(*alphas_log)}
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    gs = model_selection.GridSearchCV(
        clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)

    gs.fit(xM, yV)

    return gs
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def gs_Ridge_BIKE(A_list, yV, XX=None, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1):
    """
    As is a list of A matrices where A is similarity matrix. 
    X is a concatened linear descriptors. 
    If no X is used, X can be empty
    """

    clf = binary_model.BIKE_Ridge(A_list, XX)
    parmas = {'alpha': np.logspace(*alphas_log)}
    ln = A_list[0].shape[0]  # ls is the number of molecules.

    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(A_list)
    gs = model_selection.GridSearchCV(
        clf, parmas, scoring='r2', cv=kf_n, n_jobs=n_jobs)

    AX_idx = np.array([list(range(ln))]).T
    gs.fit(AX_idx, yV)

    return gs
train_novelty_detection.py 文件源码 项目:keras-transfer-learning-for-oxford102 作者: Arsey 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train_logistic():
    df = pd.read_csv(config.activations_path)
    df, y, classes = encode(df)

    X_train, X_test, y_train, y_test = train_test_split(df.values, y, test_size=0.2, random_state=17)

    params = {'C': [10, 2, .9, .4, .1], 'tol': [0.0001, 0.001, 0.0005]}
    log_reg = LogisticRegression(solver='lbfgs', multi_class='multinomial', class_weight='balanced')
    clf = GridSearchCV(log_reg, params, scoring='neg_log_loss', refit=True, cv=3, n_jobs=-1)
    clf.fit(X_train, y_train)

    print("best params: " + str(clf.best_params_))
    print("Accuracy: ", accuracy_score(y_test, clf.predict(X_test)))

    setattr(clf, '__classes', classes)
    # save results for further using
    joblib.dump(clf, config.get_novelty_detection_model_path())
train.py 文件源码 项目:jarvis 作者: whittlbc 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def perform():
    # Create a new grid search classifier from a sci-kit pipeline
    model = GridSearchCV(pipeline(), gs_clf_params(), n_jobs=-1)

    # Get your training and testing sets of data with 50/50 split
    (train_data, train_targets), (test_data, test_targets) = dp.get_data()

    # Train your model
    model = model.fit(train_data, train_targets)

    # Test it's accuracy
    predictions = model.predict(test_data)

    # Display the model's accuracy
    print "\nModel Accuracy: {}\n".format(np.mean(predictions == test_targets))

    # Save the trained model to disk
    save_model(model)
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
test_search.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_grid_search():
    # Test that the best estimator contains the right value for foo_param
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3)
    # make sure it selects the smallest parameter in case of ties
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    grid_search.fit(X, y)
    sys.stdout = old_stdout
    assert_equal(grid_search.best_estimator_.foo_param, 2)

    for i, foo_i in enumerate([1, 2, 3]):
        assert_true(grid_search.grid_scores_[i][0]
                    == {'foo_param': foo_i})
    # Smoke test the score etc:
    grid_search.score(X, y)
    grid_search.predict_proba(X)
    grid_search.decision_function(X)
    grid_search.transform(X)

    # Test exception handling on scoring
    grid_search.scoring = 'sklearn'
    assert_raises(ValueError, grid_search.fit, X, y)
test_search.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_grid_search_labels():
    # Check if ValueError (when labels is None) propagates to GridSearchCV
    # And also check if labels is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    labels = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    label_cvs = [LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(),
                 LabelShuffleSplit()]
    for cv in label_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             gs.fit, X, y)
        gs.fit(X, y, labels)

    non_label_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_label_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y)
test_search.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_grid_search_sparse():
    # Test that grid search works with both dense and sparse matrices
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180].tocoo(), y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert_true(np.mean(y_pred == y_pred2) >= .9)
    assert_equal(C, C2)
test_search.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def test_pandas_input():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((DataFrame, Series))
    except ImportError:
        pass

    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    for InputFeatureType, TargetType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)

        grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]})
        grid_search.fit(X_df, y_ser).score(X_df, y_ser)
        grid_search.predict(X_df)
        assert_true(hasattr(grid_search, "grid_scores_"))
test_ridge.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_ridgecv_sample_weight():
    rng = np.random.RandomState(0)
    alphas = (0.1, 1.0, 10.0)

    # There are different algorithms for n_samples > n_features
    # and the opposite, so test them both.
    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        cv = KFold(5)
        ridgecv = RidgeCV(alphas=alphas, cv=cv)
        ridgecv.fit(X, y, sample_weight=sample_weight)

        # Check using GridSearchCV directly
        parameters = {'alpha': alphas}
        fit_params = {'sample_weight': sample_weight}
        gs = GridSearchCV(Ridge(), parameters, fit_params=fit_params,
                          cv=cv)
        gs.fit(X, y)

        assert_equal(ridgecv.alpha_, gs.best_estimator_.alpha)
        assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
predictor.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def print_training_summary(self, gs):
        print('The best CV score from GridSearchCV (by default averaging across k-fold CV) for ' + self.output_column + ' is:')
        if self.took_log_of_y:
            print('    Note that this score is calculated using the natural logs of the y values.')
        print(gs.best_score_)
        print('The best params were')

        # Remove 'final_model__model' from what we print- it's redundant with model name, and is difficult to read quickly in a list since it's a python object.
        if 'model' in gs.best_params_:
            printing_copy = {}
            for k, v in gs.best_params_.items():
                if k != 'model':
                    printing_copy[k] = v
                else:
                    printing_copy[k] = utils_models.get_name_from_model(v)
        else:
            printing_copy = gs.best_params_

        print(printing_copy)

        if self.verbose:
            print('Here are all the hyperparameters that were tried:')
            raw_scores = gs.grid_scores_
            sorted_scores = sorted(raw_scores, key=lambda x: x[1], reverse=True)
            for score in sorted_scores:
                for k, v in score[0].items():
                    if k == 'model':
                        score[0][k] = utils_models.get_name_from_model(v)
                print(score)
test_model_assessment.py 文件源码 项目:palladio 作者: slipguru 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_model_assessment():
    X, y = make_classification(n_samples=40, n_features=100, n_informative=2,
                               n_classes=2, n_redundant=0)
    pipe = Pipeline([('enet', ElasticNetFeatureSelection()),
                     ('ridge', RidgeClassifier())])

    ma = ModelAssessment(GridSearchCV(pipe, {'enet__l1_ratio': [2]})).fit(X, y)
    assert len(ma.cv_results_) == 0
plotting.py 文件源码 项目:palladio 作者: slipguru 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _get_best_params(obj):
    # if obj is a ModelAssessment, then get the first GridSearch
    if isinstance(obj, ModelAssessment):
        obj = pd.DataFrame(obj.cv_results_).sort_values(
            'test_score', ascending=False).iloc[0].estimator
    elif not isinstance(obj, GridSearchCV):
        raise NotImplementedError("This can only work with a ModelAssessment "
                                  "or GridSearchCV object. You passed "
                                  "a %s object" % obj.__class__.__name__)

    return obj.best_params_
_pipeline.py 文件源码 项目:palladio 作者: slipguru 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def cv_results_(self):
        """Get GridSearchCV results."""
        check_is_fitted(self, 'gs_')
        return self.gs_.cv_results_
_pipeline.py 文件源码 项目:palladio 作者: slipguru 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def best_params_(self):
        """Get GridSearchCV best_params."""
        check_is_fitted(self, 'gs_')
        return self.gs_.best_params_
rf_train.py 文件源码 项目:trend_ml_toolkit_xgboost 作者: raymon-tian 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def tune_n_estimators_cv(estimator,params,X_train,Y_train):
    grid_search = GridSearchCV(estimator,param_grid=params,scoring='roc_auc',n_jobs=-1,cv=10,verbose=10)
    grid_search.fit(X_train,Y_train)
    return grid_search.best_params_


问题


面经


文章

微信
公众号

扫码关注公众号