python类mean_squared_error()的实例源码

test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_stacked_regressor(self):
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = StackedRegressor(bclf,
                              clfs,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_fwls_regressor(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = FWLSRegressor(bclf,
                              clfs,
                              feature_func,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_regressor(self):
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        index = [i for i in range(200)]

        rf = RandomForestRegressor()
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction = jrf.predict(X_train, index)
        mse = mean_squared_error(y_train, prediction)
        assert_less(mse, 6.0)

        rf = RandomForestRegressor(n_estimators=20)
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction2 = jrf.predict(X_train, index)
        assert_allclose(prediction, prediction2)
RegressionRandomForest.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
model_eval.py 文件源码 项目:healthcareai-py 作者: HealthCatalyst 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result
two_fold_validation.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False):
    if use_tree:
        train_clf = Classifier(tree.DecisionTreeClassifier())
    else:
        train_clf = Classifier()

    print train_clf.clf
    print ''

    t_start = time.clock()
    train_clf.learn(train_features, train_labels)
    t_end = time.clock()
    if save_file:
        train_clf.save_to_file(open(save_file, 'w'))

    p_start = time.clock()
    predicted = train_clf.clf.predict(test_features)
    p_end = time.clock()

    test_labels_t = train_clf.labels.transform(test_labels)
    print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_)
    print 'Training time: %fs' % (t_end - t_start)
    print 'Predicting time: %fs' % (p_end - p_start)
    print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted)
    return train_clf.score(test_features, test_labels)
oldpred.py 文件源码 项目:OpenAPS 作者: medicinexlab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _plot_old_pred_data(old_pred_data, show_pred_plot, save_pred_plot, show_clarke_plot, save_clarke_plot, id_str, algorithm_str, minutes_str):
    actual_bg_array = old_pred_data.result_actual_bg_array
    actual_bg_time_array = old_pred_data.result_actual_bg_time_array
    pred_array = old_pred_data.result_pred_array
    pred_time_array = old_pred_data.result_pred_time_array

    #Root mean squared error
    rms = math.sqrt(metrics.mean_squared_error(actual_bg_array, pred_array))
    print "                Root Mean Squared Error: " + str(rms)
    print "                Mean Absolute Error: " + str(metrics.mean_absolute_error(actual_bg_array, pred_array))
    print "                R^2 Coefficient of Determination: " + str(metrics.r2_score(actual_bg_array, pred_array))

    plot, zone = ClarkeErrorGrid.clarke_error_grid(actual_bg_array, pred_array, id_str + " " + algorithm_str + " " + minutes_str)
    print "                Percent A:{}".format(float(zone[0]) / (zone[0] + zone[1] + zone[2] + zone[3] + zone[4]))
    print "                Percent C, D, E:{}".format(float(zone[2] + zone[3] + zone[4])/ (zone[0] + zone[1] + zone[2] + zone[3] + zone[4]))
    print "                Zones are A:{}, B:{}, C:{}, D:{}, E:{}\n".format(zone[0],zone[1],zone[2],zone[3],zone[4])
    if save_clarke_plot: plt.savefig(id_str + algorithm_str.replace(" ", "") + minutes_str + "clarke.png")
    if show_clarke_plot: plot.show()

    plt.clf()
    plt.plot(actual_bg_time_array, actual_bg_array, label="Actual BG", color='black', linestyle='-')
    plt.plot(pred_time_array, pred_array, label="BG Prediction", color='black', linestyle=':')
    plt.title(id_str + " " + algorithm_str + " " + minutes_str + " BG Analysis")
    plt.ylabel("Blood Glucose Level (mg/dl)")
    plt.xlabel("Time (minutes)")
    plt.legend(loc='upper left')

    # SHOW/SAVE PLOT DEPENDING ON THE BOOLEAN PARAMETER
    if save_pred_plot: plt.savefig(id_str + algorithm_str.replace(" ","") + minutes_str + "plot.png")
    if show_pred_plot: plt.show()


#Function to analyze the old OpenAPS data
main.py 文件源码 项目:keras-timeseries-prediction 作者: gcarq 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def build_model(look_back: int, batch_size: int=1) -> Sequential:
    """
    The function builds a keras Sequential model
    :param look_back: number of previous time steps as int
    :param batch_size: batch_size as int, defaults to 1
    :return: keras Sequential model
    """
    model = Sequential()
    model.add(LSTM(64,
                   activation='relu',
                   batch_input_shape=(batch_size, look_back, 1),
                   stateful=True,
                   return_sequences=False))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
optimizers_test.py 文件源码 项目:stacker 作者: bamine 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def setUp(self):
        os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.X_class, self.y_class = datasets.make_classification(random_state=42)
        self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
        self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
        self.reg_scorer = Scorer("mse", metrics.mean_squared_error)

        self.classification_task_split = \
            Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
        self.regression_task_split = \
            Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)

        self.classification_task_cv = \
            Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
        self.regression_task_cv = \
            Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)
xgboost_sklearnmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def after_test(self):
#         scores_test=[]
#         scores_train=[]
#         scores_test_mse = []
#         scores_train_mse = []
#         for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
#             scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
#             scores_test_mse.append(mean_squared_error(self.y_test, y_pred))
#         
#         for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
#             scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
#             scores_train_mse.append(mean_squared_error(self.y_train, y_pred))
#         
#         pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
#         df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
#         print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
#         df.plot()
#         plt.show()
        return
gradientboostingmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def after_test(self):
        scores_test=[]
        scores_train=[]
        scores_test_mse = []
        scores_train_mse = []
        for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
            scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
            scores_test_mse.append(mean_squared_error(self.y_test, y_pred))

        for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
            scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
            scores_train_mse.append(mean_squared_error(self.y_train, y_pred))

        pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
        df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
        print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
#         df.plot()
#         plt.show()
        return
htfa.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _mse_converged(self):
        """Check convergence based on mean squared difference between
            prior and posterior

        Returns
        -------

        converged : boolean
            Whether the parameter estimation converged.

        mse : float
            Mean squared error between prior and posterior.

        """

        prior = self.global_prior_[0:self.prior_size]
        posterior = self.global_posterior_[0:self.prior_size]
        mse = mean_squared_error(prior, posterior,
                                 multioutput='uniform_average')
        if mse > self.threshold:
            return False, mse
        else:
            return True, mse
tfa.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _mse_converged(self):
        """Check convergence based on mean squared error

        Returns
        -------

        converged : boolean
            Whether the parameter estimation converged.

        mse : float
            Mean squared error between prior and posterior.

        """

        mse = mean_squared_error(self.local_prior, self.local_posterior_,
                                 multioutput='uniform_average')
        if mse > self.threshold:
            return False, mse
        else:
            return True, mse
stacking.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self,
                 bclf,
                 clfs,
                 n_folds=3,
                 oob_score_flag=False,
                 oob_metrics=mean_squared_error,
                 Kfold=None,
                 verbose=0,
                 save_stage0=False,
                 save_dir=''):
        self.n_folds = n_folds
        self.clfs = clfs
        self.bclf = bclf
        self.all_learner = OrderedDict()
        self.oob_score_flag = oob_score_flag
        self.oob_metrics = oob_metrics
        self.verbose = verbose
        self.stack_by_proba = False
        self.save_stage0 = save_stage0
        self.save_dir = save_dir
        self.MyKfold = Kfold
stacking.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self,
                 bclf,
                 clfs,
                 feature_func,
                 n_folds=3,
                 oob_score_flag=False,
                 oob_metrics=mean_squared_error,
                 Kfold=None,
                 verbose=0,
                 save_stage0=False,
                 save_dir=''):
        super(FWLSRegressor, self).__init__(bclf,
                                            clfs,
                                            n_folds,
                                            oob_score_flag,
                                            oob_metrics,
                                            Kfold,
                                            verbose,
                                            save_stage0,
                                            save_dir)

        self.feature_func = feature_func
lr_mcpc.py 文件源码 项目:rtb-unbiased-learning 作者: wnzhang 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test():
    y = []
    yp = []
    fi = open(sys.argv[1], 'r')
    for line in fi:
        data = ints(line.replace(":1", "").split())
        clk = data[1]
        mp = data[2]
        fsid = 3 # feature start id
        pred = 0.0
        for i in range(fsid, len(data)):
            feat = data[i]
            if feat in featWeight:
                pred += featWeight[feat]
        pred = sigmoid(pred)
        y.append(clk)
        yp.append(pred)
    fi.close()
    auc = roc_auc_score(y, yp)
    rmse = math.sqrt(mean_squared_error(y, yp))
    print str(round) + '\t' + str(auc) + '\t' + str(rmse)
TFPlot.py 文件源码 项目:pythonml 作者: nicholastoddsmith 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def PlotLearn(R, A, Y):
    intA = [BinVecToInt(j) for j in A]
    intY = [BinVecToInt(j) for j in Y]
    fig, ax = mpl.subplots(figsize=(20, 10))
    ax.plot(intA, intY, label ='Orig')
    l, = ax.plot(intA, intY, label ='Pred')
    ax.legend(loc = 'upper left')
    #Updates the plot in ax as model learns data
    def UpdateF(i):
        R.fit(A, Y)
        YH = R.predict(A)
        S = MSE(Y, YH)
        intYH = [BinVecToInt(j) for j in YH]
        l.set_ydata(intYH)
        ax.set_title('Iteration: ' + str(i * 64) + ' - MSE: ' + str(S))
        return l,

    ani = mpla.FuncAnimation(fig, UpdateF, frames = 2000, interval = 128, repeat = False)
    #ani.save('foo.gif')
    mpl.show()
    return ani
RegressionDecisionTree.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
RegressionUniformBlending.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def predict(self):
        # predict the test data
        y_pred1 = self.net1.predict(self.X_test)
        y_pred1 = y_pred1.reshape((y_pred1.shape[0], 1))

        y_pred2 = self.linRegr.predict(self.X_test)
        y_pred2 = y_pred2.reshape((y_pred2.shape[0], 1))

        y_pred3 = self.knn.predict(self.X_test)
        y_pred3 = y_pred3.reshape((y_pred3.shape[0], 1))

        y_pred4 = self.decisionTree.predict(self.X_test)
        y_pred4 = y_pred4.reshape((y_pred4.shape[0], 1))

        y_pred5 = self.adaReg.predict(self.X_test)
        y_pred5 = y_pred5.reshape((y_pred5.shape[0], 1))

        self.y_pred = (y_pred1+y_pred2+y_pred3+y_pred4+y_pred5)/5

        # print MSE
        mse = mean_squared_error(self.y_pred, self.y_test)
        print "MSE: {}".format(mse)
RegressionKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
RegressionRidgeReg.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'alpha': np.logspace(-5,5)
                             }
                            ]


        reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
eval.py 文件源码 项目:Hotpot 作者: Liang-Qiu 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
eval.py 文件源码 项目:Hotpot 作者: Liang-Qiu 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
nldas_soil_moisture_ml.py 文件源码 项目:elm 作者: ContinuumIO 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def r_squared_mse(y_true, y_pred, sample_weight=None, multioutput=None):

    r2 = r2_score(y_true, y_pred,
                  sample_weight=sample_weight, multioutput=multioutput)
    mse = mean_squared_error(y_true, y_pred,
                             sample_weight=sample_weight,
                             multioutput=multioutput)
    bounds_check = np.min(y_pred) > MIN_MOISTURE_BOUND
    bounds_check = bounds_check&(np.max(y_pred) < MAX_MOISTURE_BOUND)
    print('Scoring - std', np.std(y_true), np.std(y_pred))
    print('Scoring - median', np.median(y_true), np.median(y_pred))
    print('Scoring - min', np.min(y_true), np.min(y_pred))
    print('Scoring - max', np.max(y_true), np.max(y_pred))
    print('Scoring - mean', np.mean(y_true), np.mean(y_pred))
    print('Scoring - MSE, R2, bounds', mse, r2, bounds_check)
    return (float(mse),
            float(r2),
            int(bounds_check))
RFfastestLap.py 文件源码 项目:f1_2017 作者: aflaisler 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fastLapModel(xList, labels, names, multiple=0, full_set=0):
    X = numpy.array(xList)
    y = numpy.array(labels)
    featureNames = []
    featureNames = numpy.array(names)
    # take fixed holdout set 30% of data rows
    xTrain, xTest, yTrain, yTest = train_test_split(
        X, y, test_size=0.30, random_state=531)
    # for final model (no CV)
    if full_set:
        xTrain = X
        yTrain = y
    check_set(xTrain, xTest, yTrain, yTest)
    print "Fitting the model to the data set..."
    # train random forest at a range of ensemble sizes in order to see how the
    # mse changes
    mseOos = []
    m = 10 ** multiple
    nTreeList = range(500 * m, 1000 * m, 100 * m)
    # iTrees = 10000
    for iTrees in nTreeList:
        depth = None
        maxFeat = int(np.sqrt(np.shape(xTrain)[1])) + 1  # try tweaking
        RFmd = ensemble.RandomForestRegressor(n_estimators=iTrees, max_depth=depth, max_features=maxFeat,
                                              oob_score=False, random_state=531, n_jobs=-1)
        # RFmd.n_features = 5
        RFmd.fit(xTrain, yTrain)

        # Accumulate mse on test set
        prediction = RFmd.predict(xTest)
        mseOos.append(mean_squared_error(yTest, prediction))
    # plot training and test errors vs number of trees in ensemble
    plot.plot(nTreeList, mseOos)
    plot.xlabel('Number of Trees in Ensemble')
    plot.ylabel('Mean Squared Error')
    #plot.ylim([0.0, 1.1*max(mseOob)])
    plot.show()
    print("MSE")
    print(mseOos[-1])
    return xTrain, xTest, yTrain, yTest, RFmd
test_polynomial_network.py 文件源码 项目:polylearn 作者: scikit-learn-contrib 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def check_improve(degree):
    y = _lifted_predict(U[:degree], X)

    common_settings = dict(degree=degree, n_components=n_components,
                           beta=1e-10, tol=0, random_state=0)

    est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings)
    est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est_5.fit(X, y)
        est_10.fit(X, y)

    y_pred_5 = est_5.predict(X)
    y_pred_10 = est_10.predict(X)

    assert_less_equal(mean_squared_error(y, y_pred_10),
                      mean_squared_error(y, y_pred_5),
                      msg="More iterations do not improve fit.")
test_polynomial_network.py 文件源码 项目:polylearn 作者: scikit-learn-contrib 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_random_starts():
    # not as strong a test as the direct case!
    # using training error here, and a higher threshold.
    # We observe the lifted solver reaches rather diff. solutions.
    degree = 3
    noisy_y = _lifted_predict(U[:degree], X)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])

    common_settings = dict(degree=degree, n_components=n_components,
                           beta=0.01, tol=0.01)
    scores = []
    for k in range(5):
        est = PolynomialNetworkRegressor(random_state=k, **common_settings)
        y_pred = est.fit(X, noisy_y).predict(X)
        scores.append(mean_squared_error(noisy_y, y_pred))

    assert_less_equal(np.std(scores), 1e-4)
test_factorization_machine.py 文件源码 项目:polylearn 作者: scikit-learn-contrib 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def check_improve(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    est = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_lower=None, fit_linear=False,
                                        beta=0.0001, max_iter=5, tol=0,
                                        random_state=0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        y_pred_5 = est.fit(X, y).predict(X)
        est.set_params(max_iter=10)
        y_pred_10 = est.fit(X, y).predict(X)

    assert_less_equal(mean_squared_error(y, y_pred_10),
                      mean_squared_error(y, y_pred_5),
                      msg="More iterations do not improve fit.")
test_factorization_machine.py 文件源码 项目:polylearn 作者: scikit-learn-contrib 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_random_starts():
    noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])
    X_train, X_test = X[:10], X[10:]
    y_train, y_test = noisy_y[:10], noisy_y[10:]

    scores = []
    # init_lambdas='ones' is important to reduce variance here
    reg = FactorizationMachineRegressor(degree=2, n_components=n_components,
                                        beta=5, fit_lower=None,
                                        fit_linear=False, max_iter=2000,
                                        init_lambdas='ones', tol=0.001)
    for k in range(10):
        reg.set_params(random_state=k)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        scores.append(mean_squared_error(y_test, y_pred))

    assert_less_equal(np.std(scores), 0.001)
eval.py 文件源码 项目:DeepST 作者: lucktroy 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def rmse(Y_true, Y_pred):
    # https://www.kaggle.com/wiki/RootMeanSquaredError
    from sklearn.metrics import mean_squared_error
    print('shape:', Y_true.shape, Y_pred.shape)
    print("===RMSE===")
    # in
    RMSE = mean_squared_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())**0.5
    print('inflow: ', RMSE)
    # out
    if Y_true.shape[1] > 1:
        RMSE = mean_squared_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())**0.5
        print('outflow: ', RMSE)
    # new
    if Y_true.shape[1] > 2:
        RMSE = mean_squared_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())**0.5
        print('newflow: ', RMSE)
    # end
    if Y_true.shape[1] > 3:
        RMSE = mean_squared_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())**0.5
        print('endflow: ', RMSE)

    RMSE = mean_squared_error(Y_true.flatten(), Y_pred.flatten())**0.5
    print("total rmse: ", RMSE)
    print("===RMSE===")
    return RMSE


问题


面经


文章

微信
公众号

扫码关注公众号