python类cross_val_predict()的实例源码-面圈网

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _cv_r0( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

model.py 文件源码项目：wende 作者: h404bi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_model(self, n_folds=10):
        """ ?? `??K-??????Stratified K-folds cross-validating?`
            ???????
        """
        logging.debug("testing model with {}-folds CV".format(n_folds))
        model = self.init_model()
        X = self.data.data
        y = self.data.target

        cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42)

        t0 = time()
        y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv)
        t = time() - t0
        print("=" * 52)
        print("time cost: {}".format(t))
        print()
        print("confusion matrix\n", metrics.confusion_matrix(y, y_pred))
        print()
        print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred)))
        print()
        print("\t\tclassification report")
        print("-" * 52)
        print(metrics.classification_report(y, y_pred))

TermDocMatrix.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_logistic_regression_coefs_l2(self, category,
                                         clf=RidgeClassifierCV()):
        ''' Computes l2-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        category : str
            category name to score
        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        from sklearn.cross_validation import cross_val_predict
        y = self._get_mask_from_category(category)
        X = TfidfTransformer().fit_transform(self._X)
        clf.fit(X, y)
        y_hat = cross_val_predict(clf, X, y)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        return clf.coef_[0], acc, baseline

TermDocMatrix.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        from sklearn.cross_validation import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline

linear_model.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def fit(self, xy_file, fname_out):
        """
        All grid results will be saved later,
        although only the best result is saved.
        """

        df = read_csv( xy_file)
        X = df['X'].values
        y = df['y'].values

        super().fit( X, y)

        yp = cross_validation.cross_val_predict( self.best_estimator_, X, y)

        m_idx = pd.MultiIndex.from_product([['yp'], df['y'].columns])
        yp_df = pd.DataFrame( yp, index = df.index, columns=m_idx)
        df_out = pd.concat([df, yp_df], axis = 1)

        df_out.to_csv( fname_out)

        return self

pdlearn.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cross_val_predict(self, fname_out = None):
        """
        This function is added to save the result of the predicted values. 
        """
        yp = cross_validation.cross_val_predict( self.best_estimator_, self.X, self.y)

        idx = pd.MultiIndex.from_product([['yp'], self.df['y'].columns])
        yp_df = pd.DataFrame( yp, index = self.df.index, columns=idx)
        df_out_org = self.df.merge( yp_df, left_index = True, right_index = True)
        self.df_out = DataFrame( df_out_org[["X", "y", "yp", "param"]])
        # df_out = pd.concat([self.df, yp_df], axis = 1)

        self.df_out.to_csv_excel( '_out', self.fname, fname_out)        

        return yp

pdlearn.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def cross_val_predict(self, fname_out = None):
        """
        This function is added to save the result of the predicted values. 
        """
        yp = cross_validation.cross_val_predict( self.best_estimator_, self.X, self.y)

        idx = pd.MultiIndex.from_product([['yp'], self.df['y'].columns])
        yp_df = pd.DataFrame( yp, index = self.df.index, columns=idx)
        df_out_org = self.df.merge( yp_df, left_index = True, right_index = True)
        self.df_out = DataFrame( df_out_org[["X", "y", "yp", "param"]])
        # df_out = pd.concat([self.df, yp_df], axis = 1)

        self.df_out.to_csv_excel( '_out', self.fname, fname_out)        

        return yp

jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_folds = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

evaluate_features.py 文件源码项目：wende 作者: h404bi 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def cross_predict(feat, f_name, X=X, y=y):

    if os.name == 'nt':
        n_jobs = 1
    else:
        n_jobs = -1
    # ????
    # clf_1 = MultinomialNB(alpha=5)
    clf_2 = LinearSVC(C=0.02)

    # ???? (CV)
    # This cross-validation object is a merge of StratifiedKFold and ShuffleSplit,
    # which returns stratified randomized folds. The folds are made by preserving
    # the percentage of samples for each class.
    #
    #  Note: like the ShuffleSplit strategy, stratified random splits do not guarantee
    # that all folds will be different, although this is still
    # very likely for sizeable datasets.
    #
    # Pass this cv to cross_val_predict will raise
    # ValueError:cross_val_predict only works for partitions
    #
    # ? cv ?????? fold ? fold ????????
    # cv = cross_validation.StratifiedShuffleSplit(y, test_size=0.2, random_state=42)

    # This cross-validation object is a variation of KFold that returns stratified folds.
    # The folds are made by preserving the percentage of samples for each class.
    cv = cross_validation.StratifiedKFold(y, n_folds=5, random_state=42)

    model = Pipeline([('feat', feat), ('clf', clf_2)])
    t0 = time()
    y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=n_jobs, cv=cv)
    t = time() - t0
    print("=" * 20, f_name, "=" * 20)
    print("time cost: {}".format(t))
    # print("y_predict: {}".format(y_pred))
    print()
    print('confusion matrix:\n', confusion_matrix(y, y_pred))
    print()
    print('\t\taccuracy: {}'.format(accuracy_score(y, y_pred)))
    print()
    print("\t\tclassification report")
    print("-" * 52)
    print(classification_report(y, y_pred))


# ??
# ???? (tfidf: baseline feature)

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_folds = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

_jgrid_r0.py 文件源码项目：jamespy_py3 作者: jskDr 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def cv_BIKE_Ridge( A_list, yV, alpha = 0.5, XX = None, n_folds = 5, n_jobs = -1, grid_std = None):

    clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
    ln = A_list[0].shape[0] # ls is the number of molecules.
    kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

    AX_idx = np.array([list(range( ln))]).T
    yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

    print('The prediction output using cross-validation is given by:')
    jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred

multi_reg.py 文件源码项目：aq_weather 作者: eliucidate 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def multireg(self,Xtrain,ytrain, Xtest, ytest):    
                self.normalize(Xtrain)
                '''
                # polynomial try
                poly = PolynomialFeatures(degree=2)
                Xtrain = poly.fit_transform(Xtrain)
                Xtest = poly.fit_transform(Xtest)
                '''
                # normal clf fit
                clf = linear_model.LinearRegression()
                clf.fit (Xtrain, ytrain)
                coeffients = clf.coef_
                print "coefficients:", coeffients
                print "intercept:", clf.intercept_

                print "train score", clf.score(Xtrain,ytrain)
                print "test score", clf.score(Xtest,ytest)
                # manual calculate train accuracy
                train_results = clf.predict(Xtrain)
                print "first x:", Xtrain[0]
                print "first result:", train_results[0]
                correct = 0
                for i in range(len(train_results)):
                    if round(train_results[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "train accuracy: ", accuracy * 100, "%"
                # cross validation
                score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5)
                print "cross validation score: ", score

                predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5)
                correct = 0
                for i in range(len(predict)):
                    if round(predict[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "cross validation accuracy: ", accuracy * 100, "%"
                # manual calculate test accuracy
                self.normalize(Xtest)
                results = clf.predict(Xtest)
                correct = 0
                for i in range(len(results)):
                    if round(results[i], 1) == round(ytest[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytest)
                print "test accuracy: ", accuracy * 100, "%"

                return coeffients

multi_reg.py 文件源码项目：aq_weather 作者: eliucidate 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def lasso_multireg(self,Xtrain,ytrain, Xtest, ytest):    
                self.normalize(Xtrain)
                clf = linear_model.Lasso(alpha = 0.5)
                clf.fit (Xtrain, ytrain)
                coeffients = clf.coef_
                print "coeffients: ", coeffients
                print "train score", clf.score(Xtrain,ytrain)
                print "test score", clf.score(Xtest,ytest)
                # manual calculate train accuracy
                train_results = clf.predict(Xtrain)
                correct = 0
                for i in range(len(train_results)):
                    if round(train_results[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "train accuracy: ", accuracy * 100, "%"
                # cross validation
                predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5)
                correct = 0
                for i in range(len(predict)):
                    if round(predict[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "cross validation accuracy: ", accuracy * 100, "%"

                # manual calculate test accuracy
                self.normalize(Xtest)
                results = clf.predict(Xtest)
                correct = 0
                for i in range(len(results)):
                    #print round(results[i], 1), round(ytest[i], 1)
                    if round(results[i], 1) == round(ytest[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytest)
                print "test accuracy: ", accuracy * 100, "%"

                return coeffients

_test.py 文件源码项目：ibex 作者: atavory 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _generate_cross_val_predict_test(X, y, est, pd_est, must_match):
    def test(self):
        self.assertEqual(
            hasattr(est, 'predict'),
            hasattr(pd_est, 'predict'))
        if not hasattr(est, 'predict'):
            return
        pd_y_hat = pd_cross_val_predict(pd_est, X, y)
        self.assertTrue(isinstance(pd_y_hat, pd.Series))
        self.assertTrue(pd_y_hat.index.equals(X.index))
        if must_match:
            y_hat = cross_val_predict(est, X.as_matrix(), y.values)
            np.testing.assert_allclose(pd_y_hat, y_hat)
    return test

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv())

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_cross_val_predict_input_types():
    clf = Ridge()
    # Smoke test
    predictions = cval.cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (10,))

    # test with multioutput y
    predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_equal(predictions.shape, (10, 2))

    predictions = cval.cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (10,))

    # test with multioutput y
    predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_array_equal(predictions.shape, (10, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cval.cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cval.cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cval.cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (10,))

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser)

test_cross_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_cross_val_predict_sparse_prediction():
    # check that cross_val_predict gives same result for sparse and dense input
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          return_indicator=True,
                                          random_state=1)
    X_sparse = csr_matrix(X)
    y_sparse = csr_matrix(y)
    classif = OneVsRestClassifier(SVC(kernel='linear'))
    preds = cval.cross_val_predict(classif, X, y, cv=10)
    preds_sparse = cval.cross_val_predict(classif, X_sparse, y_sparse, cv=10)
    preds_sparse = preds_sparse.toarray()
    assert_array_almost_equal(preds_sparse, preds)

DecisionTrees.py 文件源码项目：a-cadmci 作者: florez87 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def validate(self, features, labels, number_folds):
        """
        Compute a model's performance metrics based on k-fold cross-validation technique.

        Parameters
        ----------
        features: array-like of shape = [number_samples, number_features]
            The validation input samples.

        labels: array-like of shape = [number_samples] or [number_samples, number_outputs]
            The target values (class labels in classification).

        number_folds: int
            The amount of folds for the k-fold cross-validation.
            If 0 compute metrics withput folds.
            If > 0 compute metrics with n folds, n=number_folds.

        Return
        ----------
        accuracy: float
            The accuracy of the model based on it's confusion matrix.

        precision: float
            The precision of the model based on it's confusion matrix.

        sensitivity: float
            The sensitivity of the model based on it's confusion matrix.

        specificity: float
            The specificity of the model based on it's confusion matrix.

        kappa: float
            The Cohen's Kappa of the model based on it's confusion matrix.
        """
        if number_folds == 0:
            predictions = self.model.predict(features)
        else:
            predictions = cross_val_predict(self.model, features, labels, cv = number_folds)
        matrix = confusion_matrix(labels, predictions)
        sum_columns = numpy.sum(matrix, 0)
        sum_rows = numpy.sum(matrix, 1)
        diagonal_sum = numpy.trace(matrix)
        total_sum = numpy.sum(sum_rows)
        accuracy = diagonal_sum / total_sum
        temp_precision = []
        temp_sensitivity = []
        temp_specificity = []
        for i in range(len(matrix)):
            temp_precision.append(matrix[i][i] / sum_columns[i])
            temp_sensitivity.append(matrix[i][i] / sum_rows[i])
            temp_reduced_sum = total_sum - sum_rows[i] - sum_columns[i] + matrix[i][i]
            temp_specificity.append(temp_reduced_sum / (temp_reduced_sum + sum_columns[i] - matrix[i][i]))
        precision = sum(temp_precision * sum_rows) / total_sum
        sensitivity = sum(temp_sensitivity * sum_rows) / total_sum
        specificity = sum(temp_specificity * sum_rows) / total_sum
        kappa_sum = sum(sum_rows * sum_columns)
        kappa_numerator = (total_sum * diagonal_sum) - kappa_sum
        kappa_denominator =  (total_sum * total_sum) - kappa_sum
        kappa = kappa_numerator / kappa_denominator
        return accuracy, precision, sensitivity, specificity, kappa

multi_reg.py 文件源码项目：aq_weather 作者: eliucidate 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def ridge_multireg(self,Xtrain,ytrain, Xtest, ytest):    
                self.normalize(Xtrain)
                '''
                # polynomial try
                poly = PolynomialFeatures(degree=2)
                Xtrain = poly.fit_transform(Xtrain)
                Xtest = poly.fit_transform(Xtest)
                '''
                # normal clf try
                clf = linear_model.Ridge(alpha = 10000)
                clf.fit (Xtrain, ytrain)
                coeffients = clf.coef_
                print "train score", clf.score(Xtrain,ytrain)
                print "test score", clf.score(Xtest,ytest)
                # manual calculate train accuracy
                train_results = clf.predict(Xtrain)
                correct = 0
                for i in range(len(train_results)):
                    if round(train_results[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "train accuracy: ", accuracy * 100, "%"
                # cross validation
                score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5)
                print "cross validation score: ", score
                '''
                predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5)
                correct = 0
                for i in range(len(predict)):
                    if round(predict[i]) == round(ytrain[i]):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "cross validation accuracy: ", accuracy * 100, "%"
                '''
                # manual calculate test accuracy
                self.normalize(Xtest)
                results = clf.predict(Xtest)
                correct = 0
                for i in range(len(results)):
                    if round(results[i], 1) == round(ytest[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytest)
                print "test accuracy: ", accuracy * 100, "%"

                return coeffients