python类RandomForestRegressor()的实例源码

model_comparison.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def cross_validate_best_known():
    '''
        import and clean the tractor data, then do a corss validation on each of the three models we are
        training here. A RandomForest, a GradientBoost, and an AdaBoost backed by a DecisionTree. Print
        the scores.

        The parameters we're using here are the "best" that we've found so far using a grid search.
    '''
    tractor_data = pd.read_csv('data/train.csv')
    tractor_data = cln.clean_all(tractor_data)
    X = tractor_data
    y = tractor_data.pop('SalePrice')

    rf = RandomForestRegressor(max_features=2, min_samples_split=4, n_estimators=50, min_samples_leaf=2)
    gb = GradientBoostingRegressor(loss='quantile', learning_rate=0.0001, n_estimators=50, max_features='log2', min_samples_split=2, max_depth=1)
    ada_tree_backing = DecisionTreeRegressor(max_features='sqrt', splitter='random', min_samples_split=4, max_depth=3)
    ab = AdaBoostRegressor(ada_tree_backing, learning_rate=0.1, loss='square', n_estimators=1000)

    validate.cross_v_scores([rf, gb, ab], X, y)
    # RandomForestRegressor -- RMLSE: -0.596797712098, R2: 0.0272065373946
    # GradientBoostingRegressor -- RMLSE: -0.996134592541, R2: -2.37202164829
    # AdaBoostRegressor -- RMLSE: -0.706385708459, R2: -0.103966980393
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_stacked_regressor(self):
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = StackedRegressor(bclf,
                              clfs,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_fwls_regressor(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = FWLSRegressor(bclf,
                              clfs,
                              feature_func,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
utils_feature_selection.py 文件源码 项目:auto_ml 作者: ClimbsRocks 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]
RandomForest.py 文件源码 项目:HousePricePredictionKaggle 作者: Nuwantha 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def model_random_forecast(Xtrain,Xtest,ytrain):

    X_train = Xtrain
    y_train = ytrain
    rfr = RandomForestRegressor(n_jobs=1, random_state=0)
    param_grid = {'n_estimators': [1000]}
    # 'n_estimators': [1000], 'max_features': [10,15,20,25], 'max_depth':[20,20,25,25,]}
    model = GridSearchCV(estimator=rfr, param_grid=param_grid, n_jobs=1, cv=10, scoring=RMSE)
    model.fit(X_train, y_train)
    print('Random forecast regression...')
    print('Best Params:')
    print(model.best_params_)
    print('Best CV Score:')
    print(-model.best_score_)

    y_pred = model.predict(Xtest)
    return y_pred, -model.best_score_
sk_feature_process.py 文件源码 项目:python_utils 作者: Jayhello 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def rfr_feature_select():
    from sklearn.datasets import load_boston
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.cross_validation import cross_val_score, ShuffleSplit

    boston = load_boston()
    X = boston["data"]
    Y = boston["target"]
    names = boston["feature_names"]

    rf = RandomForestRegressor(n_estimators=20, max_depth=4)
    scores = []
    for i in range(X.shape[1]):
        score = cross_val_score(rf, X[:, i:i + 1],
                                Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3))
        scores.append((round(np.mean(score), 3), names[i]))

    print sorted(scores, reverse=True)
utils_feature_selection.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLogisticRegression(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLasso(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]
Prediction.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def rforest2(train, test, tunings=None, smoteit=True, duplicate=True):
  "RF "
  # Apply random forest Classifier to predict the number of bugs.
  if smoteit:
    train = SMOTE(train, atleast=50, atmost=101, resample=duplicate)
  if not tunings:
    clf = RandomForestRegressor(n_estimators=100, random_state=1)
  else:
    clf = RandomForestRegressor(n_estimators=int(tunings[0]),
                                max_features=tunings[1] / 100,
                                min_samples_leaf=int(tunings[2]),
                                min_samples_split=int(tunings[3])
                                )
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_DF[test_DF.columns[:-2]])
  return preds
test.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_regressor(self):
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        index = [i for i in range(200)]

        rf = RandomForestRegressor()
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction = jrf.predict(X_train, index)
        mse = mean_squared_error(y_train, prediction)
        assert_less(mse, 6.0)

        rf = RandomForestRegressor(n_estimators=20)
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction2 = jrf.predict(X_train, index)
        assert_allclose(prediction, prediction2)
run_model_fit.py 文件源码 项目:time_series_modeling 作者: rheineke 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def unscaled_pipelines():
    # Random forest parameters
    random_forest_kwargs = {
        'n_estimators': 10,
        'criterion': 'mse',
        'random_state': _RANDOM_STATE,
        'n_jobs': cpu_count(),
        'verbose': True,
    }
    # Gradient boosting parameters
    gradient_boost_kwargs = {
        'random_state': _RANDOM_STATE,
        'verbose': 1,
    }
    models = [
        DecisionTreeRegressor(max_depth=3, random_state=_RANDOM_STATE),
        # RandomForestRegressor(**random_forest_kwargs),
        # GradientBoostingRegressor(**gradient_boost_kwargs),
    ]
    pipelines = []
    for m in models:
        # Steps
        pipelines.append(make_pipeline(m))
    return pipelines
RegressionRandomForest.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
resnet_regressor.py 文件源码 项目:Brain_Tumor_Segmentation 作者: KarthikRevanuru 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train_xgboost():
    df = pd.read_csv('survival_data.csv', index_col=0, encoding = 'UTF-7')
    p = np.array([np.mean(np.load('training/%s_flair.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    q = np.array([np.mean(np.load('training/%s_t1.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    r = np.array([np.mean(np.load('training/%s_t1ce.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])
    s = np.array([np.mean(np.load('training/%s_t2.nii.gz.npy' % str(id)), axis=0) for id in folder_names_train])

    y=np.array([])
    t=0
    z=np.array([])
    for ind in range(len(folder_names_train)):
        try:
            temp = df.get_value(str(folder_names_train[ind]),'Survival')
            y=np.append(y,temp)
            temp = df.get_value(str(folder_names_train[ind]),'Age')
            z=np.append(z,np.array([temp]))
        except Exception as e:
            t+=1 
            print (t,str(e),"Label Not found, deleting entry")
            y=np.append(y,0)

    z=np.array([[v] for v in z])

    t=np.concatenate((p,q),axis=1)
    u=np.concatenate((r,s),axis=1)
    x=np.concatenate((t,u),axis=1) 
    #print(x.shape)
    #print (x)
    #print (x.shape,z.shape)
    x=np.concatenate((x,z),axis=1)
    #print (x)
    #clf=linear_model.LogisticRegression(C=1e5)
    #clf = RandomForestRegressor()
    clf = xgb.XGBRegressor()
    clf.fit(x,y)
    return clf
RandomForest.py 文件源码 项目:pyGPGO 作者: hawk31 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fit(self, X, y):
        """
        Fit a Random Forest model to data `X` and targets `y`.

        Parameters
        ----------
        X : array-like
            Input values.
        y: array-like
            Target values.
        """
        self.X = X
        self.y = y
        self.n = self.X.shape[0]
        self.model = RandomForestRegressor(**self.params)
        self.model.fit(X, y)
test_io_types.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_random_forest_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestRegressor(random_state=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, 'multiArrayType', 'doubleType', 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
                                 type(coreml_model.predict({'data': test_data})['target']))
                self.assertAlmostEqual(scikit_model.predict(test_data)[0],
                                       coreml_model.predict({'data': test_data})['target'],
                                       msg="{} != {} for Dtype: {}".format(
                                           scikit_model.predict(test_data)[0],
                                           coreml_model.predict({'data': test_data})['target'],
                                           dtype
                                       )
                                       )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
test_random_forest_regression_numeric.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _train_convert_evaluate(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = RandomForestRegressor(random_state = 1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = scikit_model.predict(self.X)

        # Evaluate it
        metrics = evaluate_regressor(spec, df, verbose = False)
        return metrics
friedman_scores.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
classifier.py 文件源码 项目:Semantic-Texual-Similarity-Toolkits 作者: rgtjf 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf
track.py 文件源码 项目:libskeletal 作者: bobbybee 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def trainModel(featureCount, imageCount, save):
    clf = RandomForestRegressor(n_estimators=1, n_jobs=-1)

    features = generateFeatures(featureCount)

    for image in range(0, imageCount):
        print "Image " + str(image)
        train(clf, features, image)

    clf = clf.fit(X, Y)
    model = (clf, features)

    if save:
        joblib.dump(model, "model.pkl")

    return model
EstimateTheModel.py 文件源码 项目:DataAnalysis 作者: IMYin 项目源码 文件源码 阅读 55 收藏 0 点赞 0 评论 0
def set_missing_ages(df):
    age_df = df[['Age', 'Fare', 'Parch', 'SibSp', 'Pclass']]
    known_age = age_df[age_df.Age.notnull()].as_matrix()
    unknown_age = age_df[age_df.Age.isnull()].as_matrix()

    y = known_age[:, 0]
    X = known_age[:, 1:]

    # fit by RamdomForestRegressor
    rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
    rfr.fit(X, y)

    # predict the unknown age
    predictedAges = rfr.predict(unknown_age[:, 1:])
    # backfill the value of unknown age
    df.loc[(df.Age.isnull()), 'Age'] = predictedAges

    return df, rfr
kaggle_titanic.py 文件源码 项目:DataAnalysis 作者: IMYin 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def set_missing_ages(df):
    age_df = df[['Age', 'Fare', 'Parch', 'SibSp', 'Pclass']]
    known_age = age_df[age_df.Age.notnull()].as_matrix()
    unknown_age = age_df[age_df.Age.isnull()].as_matrix()

    y = known_age[:, 0]
    X = known_age[:, 1:]

    # fit by RamdomForestRegressor
    rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
    rfr.fit(X, y)

    # predict the unknown age
    predictedAges = rfr.predict(unknown_age[:, 1:])
    # backfill the value of unknown age
    df.loc[(df.Age.isnull()), 'Age'] = predictedAges

    return df, rfr


# processing the column : Cabin
test_gbrt.py 文件源码 项目:scikit-optimize 作者: scikit-optimize 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_gbrt_base_estimator():
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    base = RandomForestRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'type GradientBoostingRegressor',
                         rgr.fit, X, y)

    base = GradientBoostingRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'quantile loss', rgr.fit, X, y)

    base = GradientBoostingRegressor(loss='quantile', n_estimators=20)
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    rgr.fit(X, y)

    estimates = rgr.predict(X, return_quantiles=True)
    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
                        np.mean(estimates, axis=0),
                        decimal=2)
predict_2017_06_16_3.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    N_splits = 300
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    for n, (itrain, ival) in enumerate(skf.split(train2, y)):
        print('step %d of %d'%(n+1, skf.n_splits), now())
        clf = ensemble.RandomForestRegressor(n_estimators=1000,
                                             max_depth=3,
                                             random_state=13)
        clf.fit(train2[itrain], y[itrain])

        p = clf.predict(train2[ival])
        v.loc[ival, cname] += p
        score = metrics.log_loss(y[ival], p)
        z[cname]  += np.log1p(clf.predict(test2))
        print(cname, 'step %d: score'%(n+1), score, now())
        scores.append(score)

    print('validation loss: ', metrics.log_loss(y, v[cname]))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= N_splits
10.4 RF_regression.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def test_RandomForestRegressor_num(*data):
    '''
    test the performance with different n_estimators
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    nums=np.arange(1,100,step=2)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for num in nums:
        regr=ensemble.RandomForestRegressor(n_estimators=num)
        regr.fit(X_train,y_train)
        training_scores.append(regr.score(X_train,y_train))
        testing_scores.append(regr.score(X_test,y_test))
    ax.plot(nums,training_scores,label="Training Score")
    ax.plot(nums,testing_scores,label="Testing Score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(-1,1)
    plt.suptitle("RandomForestRegressor")
    plt.show()
10.4 RF_regression.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test_RandomForestRegressor_max_depth(*data):
    '''
    test the performance with different max_depth
    :param data:  train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    maxdepths=range(1,20)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for max_depth in maxdepths:
        regr=ensemble.RandomForestRegressor(max_depth=max_depth)
        regr.fit(X_train,y_train)
        training_scores.append(regr.score(X_train,y_train))
        testing_scores.append(regr.score(X_test,y_test))
    ax.plot(maxdepths,training_scores,label="Training Score")
    ax.plot(maxdepths,testing_scores,label="Testing Score")
    ax.set_xlabel("max_depth")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("RandomForestRegressor")
    plt.show()
10.4 RF_regression.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test_RandomForestRegressor_max_features(*data):
    '''
    test the performance with different max_features
    :param data:  train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    max_features=np.linspace(0.01,1.0)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for max_feature in max_features:
        regr=ensemble.RandomForestRegressor(max_features=max_feature)
        regr.fit(X_train,y_train)
        training_scores.append(regr.score(X_train,y_train))
        testing_scores.append(regr.score(X_test,y_test))
    ax.plot(max_features,training_scores,label="Training Score")
    ax.plot(max_features,testing_scores,label="Testing Score")
    ax.set_xlabel("max_feature")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("RandomForestRegressor")
    plt.show()
rice_run.py 文件源码 项目:Black-Swan 作者: 12190143 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def rf(train_sample, validation_sample, features, seed):
    log_base = np.e
    rf_est = RandomForestRegressor(n_estimators=500,
                                   criterion='mse',
                                   max_features=4,
                                   max_depth=None,
                                   bootstrap=True,
                                   min_samples_split=4,
                                   min_samples_leaf=1,
                                   min_weight_fraction_leaf=0,
                                   max_leaf_nodes=None,
                                   random_state=seed
                                   ).fit(
        train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base))
    rf_prob = np.power(log_base, rf_est.predict(validation_sample[features])) - 1
    print_mape(validation_sample['volume'], rf_prob, 'RF')
    return rf_prob
test_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_check_consistent_length():
    check_consistent_length([1], [2], [3], [4], [5])
    check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b'])
    check_consistent_length([1], (2,), np.array([3]), sp.csr_matrix((1, 2)))
    assert_raises_regexp(ValueError, 'inconsistent numbers of samples',
                         check_consistent_length, [1, 2], [1])
    assert_raises_regexp(TypeError, 'got <\w+ \'int\'>',
                         check_consistent_length, [1, 2], 1)
    assert_raises_regexp(TypeError, 'got <\w+ \'object\'>',
                         check_consistent_length, [1, 2], object())

    assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1))
    # Despite ensembles having __len__ they must raise TypeError
    assert_raises_regexp(TypeError, 'estimator', check_consistent_length,
                         [1, 2], RandomForestRegressor())
    # XXX: We should have a test with a string, but what is correct behaviour?
rf.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = RF( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params )

    return train_and_eval_sklearn_regressor( clf, data )
optimizers.py 文件源码 项目:stacker 作者: bamine 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, task: Task, scorer: Scorer, opt_logger: OptimizationLogger=VoidLogger(None)):
        if task.task == "classification":
            space = RandomForestOptimizer.Params.classification_space
            model = ensemble.RandomForestClassifier()
        else:
            space = RandomForestOptimizer.Params.regression_space
            model = ensemble.RandomForestRegressor()
        super().__init__(model, task, space, scorer, opt_logger)
randomforestmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def setClf(self):
#         min_samples_split = 3
#         self.clf = RandomForestRegressor(n_estimators = 100, max_features = 0.3, min_samples_split =1, verbose=100, n_jobs=-1)
        self.clf = RandomForestRegressor(n_estimators = 100, max_features = 0.8)
        return


问题


面经


文章

微信
公众号

扫码关注公众号