python类make_scorer()的实例源码

model_base.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def greedy_select_features(self):
        saved = None if self.debug_ else self.load('chosen_features')
        if saved == None:
            print('initial shapes:', self.train_.shape, self.test_.shape)
            num_columns = self.train_.shape[1]
            col_names = [str(c) for c in range(num_columns)]
            self.train_.columns = col_names
            self.test_.columns = col_names

            g_best_score = 1e9
            g_best_features = None

            y = self.y_.ravel()
            current = set()
            scorer = metrics.make_scorer(metrics.log_loss)
            for _ in enumerate(col_names):
                avail = set(col_names).difference(current)
                best_score = 1e9
                best_features = None
                for f in avail:
                    newf = list(current | {f})
                    cv = model_selection.cross_val_score(linear_model.BayesianRidge(),
                                                         self.train_[newf], y,
                                                         cv=self.n_fold_, n_jobs=-2,
                                                         scoring = scorer)
                    score = np.mean(cv)
                    if best_score > score:
                        best_score = score
                        best_features = newf
                current = set(best_features)
                if g_best_score > best_score:
                    g_best_score = best_score
                    g_best_features = best_features
                    print('new best:', g_best_score, g_best_features, self.now())
                if len(best_features) - len(g_best_features) > 15:
                    break
            self.save('chosen_features', (g_best_features, None))
        else:
            g_best_features, _ = saved

        print('feature selection complete.', self.now())
        self.train_ = self.train_[g_best_features]
        self.test_ = self.test_[g_best_features]
model_base_2.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def greedy_select_features(self):
        print('initial shapes:', self.train_.shape, self.test_.shape)
        saved = None if self.debug_ else self.load('chosen_features')

        if saved == None:
            g_best_score = 1e9
            g_best_features = []
            current = set()
            finished = False
        else:
            g_best_features, g_best_score, finished = saved
            current = set(g_best_features)
            print('SFS REUSE:', g_best_score, g_best_features, self.now())

        num_columns = self.train_.shape[1]
        col_names = [str(c) for c in range(num_columns)]
        self.train_.columns = col_names
        self.test_.columns = col_names

        if not finished:
            y = self.y_.ravel()
            scorer = metrics.make_scorer(metrics.log_loss)
            loop_count = len(col_names) - len(g_best_features)
            for _ in range(loop_count):
                avail = set(col_names).difference(current)
                best_score = 1e9
                best_features = None
                for f in avail:
                    newf = list(current | {f})
                    score, _ = self.ccv(linear_model.BayesianRidge(), self.train_[newf], y, scorer)
                    if best_score > score:
                        best_score = score
                        best_features = newf
                current = set(best_features)
                if g_best_score > best_score:
                    g_best_score = best_score
                    g_best_features = best_features
                    print('new best:', g_best_score, g_best_features, self.now())
                if len(best_features) - len(g_best_features) > 5:
                    break
                self.save('chosen_features', (g_best_features, g_best_score, False))
            # now
            self.save('chosen_features', (g_best_features, g_best_score, True))

        print('feature selection complete.', self.now())
        self.train_ = self.train_[g_best_features]
        self.test_ = self.test_[g_best_features]
test_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = StratifiedKFold(2)

    score, scores, pvalue = permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
    assert_greater(score, 0.9)
    assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy",
        labels=np.ones(y.size), random_state=0)
    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel='linear')
    cv_sparse = StratifiedKFold(2)
    score_label, _, pvalue_label = permutation_test_score(
        svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
        scoring="accuracy", labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # test with custom scoring object
    def custom_score(y_true, y_pred):
        return (((y_true == y_pred).sum() - (y_true != y_pred).sum())
                / y_true.shape[0])

    scorer = make_scorer(custom_score)
    score, _, pvalue = permutation_test_score(
        svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
    assert_almost_equal(score, .93, 2)
    assert_almost_equal(pvalue, 0.01, 3)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)
test_cross_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
    assert_greater(score, 0.9)
    assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy",
        labels=np.ones(y.size), random_state=0)
    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel='linear')
    cv_sparse = cval.StratifiedKFold(y, 2)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
        scoring="accuracy", labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # test with custom scoring object
    def custom_score(y_true, y_pred):
        return (((y_true == y_pred).sum() - (y_true != y_pred).sum())
                / y_true.shape[0])

    scorer = make_scorer(custom_score)
    score, _, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
    assert_almost_equal(score, .93, 2)
    assert_almost_equal(pvalue, 0.01, 3)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)
test_rfe.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_rfecv():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)   # regression test: list should be supported

    # Test using the score function
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    rfecv.fit(X, y)
    # non-regression test for missing worst feature:
    assert_equal(len(rfecv.grid_scores_), X.shape[1])
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)

    # All the noisy variable were filtered out
    assert_array_equal(X_r, iris.data)

    # same in sparse
    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)

    # Test using a customized loss function
    scoring = make_scorer(zero_one_loss, greater_is_better=False)
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=scoring)
    ignore_warnings(rfecv.fit)(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test using a scorer
    scorer = get_scorer('accuracy')
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=scorer)
    rfecv.fit(X, y)
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    # Test fix on grid_scores
    def test_scorer(estimator, X, y):
        return 1.0
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5,
                  scoring=test_scorer)
    rfecv.fit(X, y)
    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))

    # Same as the first two tests, but with step=2
    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    rfecv.fit(X, y)
    assert_equal(len(rfecv.grid_scores_), 6)
    assert_equal(len(rfecv.ranking_), X.shape[1])
    X_r = rfecv.transform(X)
    assert_array_equal(X_r, iris.data)

    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2, cv=5)
    X_sparse = sparse.csr_matrix(X)
    rfecv_sparse.fit(X_sparse, y)
    X_r_sparse = rfecv_sparse.transform(X_sparse)
    assert_array_equal(X_r_sparse.toarray(), iris.data)


问题


面经


文章

微信
公众号

扫码关注公众号