python类RandomForestRegressor()的实例源码

machine_learning_example.py 文件源码 项目:POWER 作者: pennelise 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def machine_learning_RF(x_train,y_train,x_test,y_test):
    import numpy as np
    mask = []

    #Gets rid of NaNs
    for i in range(np.shape(x_train)[1]):
        mask.append(~np.isnan(x_train[:,i]))
    mask.append(~np.isnan(np.transpose(y_train)))  
    mask = np.transpose(reduce(np.logical_and, mask))
    mask = mask.reshape(len(mask),)

    inputs = x_train[mask,:]
    targets = y_train[mask]

    mask2 = []
    for i in range(np.shape(x_test)[1]):
        mask2.append(~np.isnan(x_test[:,i]))  
    mask2 = np.transpose(reduce(np.logical_and, mask2))
    inputs_test = x_test[mask2,:]
    #End getting rid of NaNs

    #Sets up forest
    #n-estimators is how many "trees" (samples) you will take
    from sklearn.ensemble import RandomForestRegressor
    rfc_new = RandomForestRegressor(n_estimators=100,random_state=42,max_features=2)
    #Training
    rfc_new = rfc_new.fit(inputs,targets)
    #Predicting
    predicted_y = rfc_new.predict(inputs_test)
    print rfc_new.feature_importances_    
    return y_test[mask2], predicted_y
_random_forest_regressor.py 文件源码 项目:coremltools 作者: gsabran 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def convert(model, feature_names, target):
    """Convert a boosted tree model to protobuf format.

    Parameters
    ----------
    decision_tree : RandomForestRegressor
        A trained scikit-learn tree model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _ensemble.RandomForestRegressor)
    def is_rf_model(m):
        if len(m.estimators_) == 0:
            return False
        if hasattr(m, 'estimators_') and m.estimators_ is not None:
            for t in m.estimators_:
                if not hasattr(t, 'tree_') or t.tree_ is None:
                    return False
            return True
        else:
            return False
    _sklearn_util.check_fitted(model, is_rf_model)
    return _MLModel(_convert_tree_ensemble(model, feature_names, target))
run_model_fit.py 文件源码 项目:time_series_modeling 作者: rheineke 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def persist_pipelines(pipelines):
    Path('models').mkdir(exist_ok=True)
    fp_fmt = 'models/{}-{:%y-%m-%d}.pkl'
    now = dt.datetime.now()
    for pipe in pipelines:
        print(utils.pipeline_name(pipe))
        fp_name = fp_fmt.format(utils.pipeline_name(pipe), now)
        joblib.dump(pipe, fp_name)
        # Pickle fails to work on RandomForestRegressor
        # with open(fp_name, 'wb') as fp:
        #     pickle.dump(pipe, fp)
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
RegressionRandomForest.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self, isTrain):
        super(RegressionRandomForest, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create linear regression object
        self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
PredictiveModel.py 文件源码 项目:nirdizati-runtime 作者: nirdizati 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, nr_events, case_id_col, encoder_kwargs, cls_kwargs, cls_method="rf"):

        self.case_id_col = case_id_col
        self.nr_events = nr_events

        self.encoder = SequenceEncoder(nr_events=nr_events, case_id_col=case_id_col, **encoder_kwargs)

        if cls_method == "gbm":
            self.cls = GradientBoostingRegressor(**cls_kwargs)
        elif cls_method == "rf":
            self.cls = RandomForestRegressor(**cls_kwargs)
        else:
            print("Classifier method not known")
RFfastestLap.py 文件源码 项目:f1_2017 作者: aflaisler 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def fastLapModel(xList, labels, names, multiple=0, full_set=0):
    X = numpy.array(xList)
    y = numpy.array(labels)
    featureNames = []
    featureNames = numpy.array(names)
    # take fixed holdout set 30% of data rows
    xTrain, xTest, yTrain, yTest = train_test_split(
        X, y, test_size=0.30, random_state=531)
    # for final model (no CV)
    if full_set:
        xTrain = X
        yTrain = y
    check_set(xTrain, xTest, yTrain, yTest)
    print "Fitting the model to the data set..."
    # train random forest at a range of ensemble sizes in order to see how the
    # mse changes
    mseOos = []
    m = 10 ** multiple
    nTreeList = range(500 * m, 1000 * m, 100 * m)
    # iTrees = 10000
    for iTrees in nTreeList:
        depth = None
        maxFeat = int(np.sqrt(np.shape(xTrain)[1])) + 1  # try tweaking
        RFmd = ensemble.RandomForestRegressor(n_estimators=iTrees, max_depth=depth, max_features=maxFeat,
                                              oob_score=False, random_state=531, n_jobs=-1)
        # RFmd.n_features = 5
        RFmd.fit(xTrain, yTrain)

        # Accumulate mse on test set
        prediction = RFmd.predict(xTest)
        mseOos.append(mean_squared_error(yTest, prediction))
    # plot training and test errors vs number of trees in ensemble
    plot.plot(nTreeList, mseOos)
    plot.xlabel('Number of Trees in Ensemble')
    plot.ylabel('Mean Squared Error')
    #plot.ylim([0.0, 1.1*max(mseOob)])
    plot.show()
    print("MSE")
    print(mseOos[-1])
    return xTrain, xTest, yTrain, yTest, RFmd
model.py 文件源码 项目:CryptoBot 作者: AdeelMufti 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def fit_forest(X, y, window=100000, estimators=100,
               samples_leaf=250, validate=True):
    '''
    Fits Random Forest
    '''
    model = RandomForestRegressor(n_estimators=estimators,
                                  min_samples_leaf=samples_leaf,
                                  random_state=42,
                                  n_jobs=-1)
    if validate:
        return cross_validate(X, y, model, window)
    return model.fit(X, y)
RandomForest.py 文件源码 项目:pyGPGO 作者: hawk31 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, **params):
        """
        Wrapper around sklearn's Random Forest implementation for pyGPGO.
        Random Forests can also be used for surrogate models in Bayesian Optimization.
        An estimate of 'posterior' variance can be obtained by using the `impurity`
        criterion value in each subtree.

        Parameters
        ----------
        params: tuple, optional
            Any parameters to pass to `RandomForestRegressor`. Defaults to sklearn's.

        """
        self.params = params
RF_solver.py 文件源码 项目:tpai_comp 作者: luuuyi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def generate_RF_model(file_name):
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby|hometown|residence')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Random Forest Regression Model...'
    start_time  = datetime.datetime.now()
    rf = RandomForestRegressor(n_estimators=25, n_jobs=-1)#, class_weight='balanced')
    rf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: '
    print (end_time-start_time).seconds

    print 'Save Model...'
    joblib.dump(rf, 'RF.model')
    return rf
rf.py 文件源码 项目:SMAC3 作者: automl 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def rf_from_cfg(cfg, seed):
    """
        Creates a random forest regressor from sklearn and fits the given data on it.
        This is the function-call we try to optimize. Chosen values are stored in
        the configuration (cfg).

        Parameters:
        -----------
        cfg: Configuration
            configuration chosen by smac
        seed: int or RandomState
            used to initialize the rf's random generator

        Returns:
        -----------
        np.mean(rmses): float
            mean of root mean square errors of random-forest test predictions
            per cv-fold
    """
    rfr = RandomForestRegressor(
        n_estimators=cfg["num_trees"],
        criterion=cfg["criterion"],
        min_samples_split=cfg["min_samples_to_split"],
        min_samples_leaf=cfg["min_samples_in_leaf"],
        min_weight_fraction_leaf=cfg["min_weight_frac_leaf"],
        max_features=cfg["max_features"],
        max_leaf_nodes=cfg["max_leaf_nodes"],
        bootstrap=cfg["do_bootstrapping"],
        random_state=seed)

    def rmse(y, y_pred):
        return np.sqrt(np.mean((y_pred - y)**2))
    # Creating root mean square error for sklearns crossvalidation
    rmse_scorer = make_scorer(rmse, greater_is_better=False)
    score = cross_val_score(rfr, boston.data, boston.target, cv=11, scoring=rmse_scorer)
    return -1 * np.mean(score)  # Because cross_validation sign-flips the score
random_forest.py 文件源码 项目:guacml 作者: guacml 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def train(self, x, y,
              n_estimators=10,
              max_depth=None,
              min_samples_leaf=1):
        n_estimators = self.to_int(n_estimators)
        max_depth = self.to_int(max_depth)
        min_samples_leaf = self.pos_int(min_samples_leaf)

        if self.problem_type == ProblemType.BINARY_CLAS:
            self.model = RandomForestClassifier(n_estimators,
                                                max_depth=max_depth,
                                                min_samples_leaf=min_samples_leaf)
        elif self.problem_type == ProblemType.REGRESSION:
            self.model = RandomForestRegressor(n_estimators,
                                               max_depth=max_depth,
                                               min_samples_leaf=min_samples_leaf)
        else:
            raise NotImplementedError('Problem type {0} not implemented'.format(self.problem_type))

        self.model.fit(x, y)
models.py 文件源码 项目:sanergy-public 作者: dssg 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))
utils.py 文件源码 项目:fluentopt 作者: mehdidc 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def predict(self, X, return_std=False):
        if return_std:
            trees = self.estimators_
            y = np.concatenate([tree.predict(X)[np.newaxis, :] for tree in trees], axis=0)
            mean = y.mean(axis=0)
            std = y.std(axis=0)
            return mean, std
        else:
            return super(RandomForestRegressor, self).predict(X)
prepare_data.py 文件源码 项目:datasciences 作者: BenChehade 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def greedy_elim(df):

    # do feature selection using boruta
    X = df[[x for x in df.columns if x!='SalePrice']]
    y = df['SalePrice']
    #model = RandomForestRegressor(n_estimators=50)
    model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.05)
    # 150 features seems to be the best at the moment. Why this is is unclear.
    feat_selector = RFE(estimator=model, step=1, n_features_to_select=150)

    # find all relevant features
    feat_selector.fit_transform(X.as_matrix(), y.as_matrix())

    # check selected features
    features_bool = np.array(feat_selector.support_)
    features = np.array(X.columns)
    result = features[features_bool]
    #print(result)

    # check ranking of features
    features_rank = feat_selector.ranking_
    #print(features_rank)
    rank = features_rank[features_bool]
    #print(rank)

    return result
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
RandomForest.py 文件源码 项目:pyGPGO 作者: hawk31 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, **params):
        """
        Wrapper around sklearn's ExtraTreesRegressor implementation for pyGPGO.
        Random Forests can also be used for surrogate models in Bayesian Optimization.
        An estimate of 'posterior' variance can be obtained by using the `impurity`
        criterion value in each subtree.

        Parameters
        ----------
        params: tuple, optional
            Any parameters to pass to `RandomForestRegressor`. Defaults to sklearn's.

        """
        self.params = params
model_comparison.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def random_forest_grid_search():
    random_forest_grid = {
        'n_estimators': [50, 100, 1000],
        'max_features': ['sqrt', 'log2', 'auto'],
        'min_samples_split': [2, 4],
        'min_samples_leaf': [1, 2],
    }
    rf = RandomForestRegressor()

    return random_forest_grid, rf
_random_forest_regressor.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def convert(model, feature_names, target):
    """Convert a boosted tree model to protobuf format.

    Parameters
    ----------
    decision_tree : RandomForestRegressor
        A trained scikit-learn tree model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _ensemble.RandomForestRegressor)
    def is_rf_model(m):
        if len(m.estimators_) == 0:
            return False
        if hasattr(m, 'estimators_') and m.estimators_ is not None:
            for t in m.estimators_:
                if not hasattr(t, 'tree_') or t.tree_ is None:
                    return False
            return True
        else:
            return False
    _sklearn_util.check_fitted(model, is_rf_model)
    return _MLModel(_convert_tree_ensemble(model, feature_names, target))
test_random_forest_regression.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestRegressor

        scikit_data = load_boston()
        scikit_model = RandomForestRegressor(random_state = 1)
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model


问题


面经


文章

微信
公众号

扫码关注公众号