python类RandomForestClassifier()的实例源码

demo_mnist.py 文件源码 项目:gcForest 作者: kingfengji 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def get_toy_config():
    config = {}
    ca_config = {}
    ca_config["random_state"] = 0
    ca_config["max_layers"] = 100
    ca_config["early_stopping_rounds"] = 3
    ca_config["n_classes"] = 10
    ca_config["estimators"] = []
    ca_config["estimators"].append(
            {"n_folds": 5, "type": "XGBClassifier", "n_estimators": 10, "max_depth": 5,
             "objective": "multi:softprob", "silent": True, "nthread": -1, "learning_rate": 0.1} )
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
    ca_config["estimators"].append({"n_folds": 5, "type": "ExtraTreesClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
    ca_config["estimators"].append({"n_folds": 5, "type": "LogisticRegression"})
    config["cascade"] = ca_config
    return config
ClassificationRandomForest.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestClassifier(n_estimators=30), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
SentiCR.py 文件源码 项目:SentiCR 作者: senticr 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
with_csv.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def performRFClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Random Forest Binary Classification
    """

    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
sp_start.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def performRFClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Random Forest Binary Classification
    """

    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "RF: ", accuracy
TMDetection.py 文件源码 项目:US-TransportationMode 作者: vlomonaco 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def random_forest(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("RANDOM FOREST.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR)
        classifier_forest.fit(train_features, train_classes)
        test_prediction = classifier_forest.predict(test_features)
        acc = accuracy_score(test_classes, test_prediction)
        df_feature = pd.DataFrame(
            {'accuracy': acc, 'featureName': features, 'importance': classifier_forest.feature_importances_})
        df_feature = df_feature.sort_values(by='importance', ascending=False)
        print("ACCURACY : " + str(acc))
        print("END RANDOM FOREST")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_RANDOM_FOREST_RESULTS, index=False)

    # neural network algorithm training on training al train set and test on all test set
test_random_forest_classifier.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
test_io_types.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_random_forest_classifier(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestClassifier(random_state=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, 'multiArrayType', 'int64Type', 2)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0],
                                 bool(int(coreml_model.predict({'data': test_data})['target'])),
                                 msg="{} != {} for Dtype: {}".format(
                                     scikit_model.predict(test_data)[0],
                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
                                     dtype
                                 )
                                 )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
forest.py 文件源码 项目:cgpm 作者: probcomp 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self, outputs, inputs, k=None, hypers=None, params=None,
            distargs=None, rng=None):
        self.rng = gu.gen_rng() if rng is None else rng
        self.outputs = outputs
        self.inputs = inputs
        self.rng = gu.gen_rng() if rng is None else rng
        assert len(self.outputs) == 1
        assert len(self.inputs) >= 1
        assert self.outputs[0] not in self.inputs
        assert len(distargs['inputs']['stattypes']) == len(self.inputs)
        self.stattypes = distargs['inputs']['stattypes']
        # Number of output categories and input dimension.
        # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
        self.k = k if k is not None else int(distargs['k'])
        self.p = len(distargs['inputs']['stattypes'])
        # Sufficient statistics.
        self.N = 0
        self.data = Data(x=OrderedDict(), Y=OrderedDict())
        self.counts = [0] * self.k
        # Outlier and random forest parameters.
        if params is None: params = {}
        self.alpha = params.get('alpha', .1)
        self.regressor = params.get('forest', None)
        if self.regressor is None:
            self.regressor = RandomForestClassifier(random_state=self.rng)
random_forest.py 文件源码 项目:MLAB_Intuit 作者: rykard95 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def rf_categorize(email):
    # get training corpus
    emails = []
    db = utils.get_local_db()
    for collection in db.collection_names():
        for record in db.get_collection(collection).find():
            emails.append([collection] + [record['Text']])

    # vectorize corpus
    labels = [row[0] for row in emails]
    data = [row[1] for row in emails]
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(data)
    X = X.toarray()

    # vectorize input
    email_vector = vectorizer.transform([email])

    # create random forest and return prediction
    forest = RandomForestClassifier(n_estimators = int(sqrt(len(X[0])))+1)
    forest.fit(X, labels)
    return forest.predict(email_vector)[0]
utils.py 文件源码 项目:magic 作者: pan-webis-de 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_classifier(method='logistic_regression'):
    if 'logistic_regression' == method:
        return LogisticRegression(C=1e3,
                                  tol=0.01,
                                  multi_class='ovr',
                                  solver='liblinear',
                                  n_jobs=-1,
                                  random_state=123)
    if 'random_forest' == method:
        return RandomForestClassifier(n_estimators=250,
                                      bootstrap=False,
                                      n_jobs=-1,
                                      random_state=123)

    if 'gradient_boosting' == method:
        return xgb.XGBClassifier(max_depth=10,
                                 subsample=0.7,
                                 n_estimators=500,
                                 min_child_weight=0.05,
                                 colsample_bytree=0.3,
                                 learning_rate=0.1)
TitanicML.py 文件源码 项目:ML_lessons 作者: supcom-machine-learning 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def applyRandomForestClassifier(self, train, test):
        #init algorithm 
        RFC = RandomForestClassifier()

        #training target 
        y_train = train[["Survived"]]
        x_train = train[train.columns.difference(["PassengerId","Survived"])]

        #fitting 
        RFC.fit(x_train, y_train)

        result = RFC.predict(test[test.columns.difference(["PassengerId"])])

        self.writeMessage("current training score")
        print RFC.score(x_train, y_train)

        test["Survived"] = result 

        return test
classifier.py 文件源码 项目:stock-price-prediction 作者: chinuy 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
Stock_Prediction_Model_Random_Forrest.py 文件源码 项目:StockRecommendSystem 作者: doncat99 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def build_model(self, X_train, y_train):
        if self.paras.load == True:
            model = self.load_training_model(self.paras.window_len)
            if model != None:
                return model

        print('build Random Forrest model...')

        # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees
        t_min = self.paras.tree_min[index]
        t_max = self.paras.tree_max[index]
        # range of max of features : 1 -> 10 features
        f_min = self.paras.feature_min[index]
        f_max = self.paras.feature_max[index]
        # range of window : 1 -> 70 days 
        w_min = self.paras.window_min
        w_max = self.paras.window_max

        w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max)
        model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose)
        return model
models.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
rf.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = RF( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params )

    return train_and_eval_sklearn_classifier( clf, data )
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
classifier_utils.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf


问题


面经


文章

微信
公众号

扫码关注公众号