python类RandomForestClassifier()的实例源码-面圈网

test_feature_importances.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}

utils_feature_selection.py 文件源码项目：auto_ml 作者: ClimbsRocks 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None

test_hyperopt_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        hyperopt = HyperoptOptimizer(model, [p1], clf_score)
        best_params, best_model = hyperopt.fit(X_train=data, y_train=target, n_iters=10)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

        for status in hyperopt.trials.statuses():
            self.assertEqual(status, 'ok')

utils_feature_selection.py 文件源码项目：auto_ml 作者: doordash 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLogisticRegression(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLasso(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]

classifier.py 文件源码项目：rltk 作者: usc-isi-i2 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]

classify.py 文件源码项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf

p-final.py 文件源码项目：Stock-Market-Prediction 作者: Diptiranjan1 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print()
    print()
    return confidence

# examples of running:

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])

clf_utils.py 文件源码项目：SourceFilterContoursMelody 作者: juanjobosch 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def train_clf(x_train, y_train, best_depth):
    """ Train classifier.

    Parameters
    ----------
    x_train : np.array [n_samples, n_features]
        Training features.
    y_train : np.array [n_samples]
        Training labels
    best_depth : int
        Optimal max_depth parameter

    Returns
    -------
    clf : classifier
        Trained scikit-learn classifier
    """
    clf = RFC(n_estimators=100, max_depth=best_depth, n_jobs=-1,
              class_weight='auto', max_features=None)
    clf = clf.fit(x_train, y_train)
    return clf

models_classification.py 文件源码项目：easyML 作者: aarshayj 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=RandomForestClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"
        self.model_output['OOB_Score'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)

test_gridsearch_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        grid_sizes = {'max_depth': 5}
        grid_search = GridSearchOptimizer(model, [p1], clf_score, grid_sizes)
        best_params, best_model = grid_search.fit(X_train=data, y_train=target)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

test_gridsearch_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_objective_function(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=10,
                                   n_informative=10,
                                   n_redundant=0,
                                   class_sep=100,
                                   n_clusters_per_class=1,
                                   flip_y=0.0)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        fun = partial(objective, model, 
                                 'sklearn', 
                                 clf_score,
                                 data, target, data, target)
        # model should fit the data perfectly
        final_score = fun(model.get_params())[0]
        self.assertEqual(final_score,1)

test_bayesian_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_expected_improvement_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='expected_improvement')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

test_bayesian_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_probability_of_improvement_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='probability_of_improvement')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

test_bayesian_optimizer.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_upper_confidence_bound_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='upper_confidence_bound')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

test_random_search.py 文件源码项目：OptML 作者: johannespetrat 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        rand_search = RandomSearchOptimizer(model, [p1], clf_score)
        best_params, best_model = rand_search.fit(X_train=data, y_train=target, n_iters=10)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

optimizers.py 文件源码项目：stacker 作者: bamine 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, task: Task, scorer: Scorer, opt_logger: OptimizationLogger=VoidLogger(None)):
        if task.task == "classification":
            space = RandomForestOptimizer.Params.classification_space
            model = ensemble.RandomForestClassifier()
        else:
            space = RandomForestOptimizer.Params.regression_space
            model = ensemble.RandomForestRegressor()
        super().__init__(model, task, space, scorer, opt_logger)

learn.py 文件源码项目：XTREE 作者: ai-se 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])

Prediction.py 文件源码项目：XTREE 作者: ai-se 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def rforest(train, test, tunings=None, smoteit=True, duplicate=True):
  "RF "
  # Apply random forest Classifier to predict the number of bugs.
  if smoteit:
    train = SMOTE(train, atleast=50, atmost=101, resample=duplicate)
  if not tunings:
    clf = RandomForestClassifier(n_estimators=100, random_state=1)
  else:
    clf = RandomForestClassifier(n_estimators=int(tunings[0]),
                                 max_features=tunings[1] / 100,
                                 min_samples_leaf=int(tunings[2]),
                                 min_samples_split=int(tunings[3])
                                 )
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_DF[test_DF.columns[:-2]])
  return preds

drift_estimator.py 文件源码项目：MLBox 作者: AxeldeRomblay 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __init__(self,
                 estimator=RandomForestClassifier(n_estimators=50,
                                                  n_jobs=-1,
                                                  max_features=1.,
                                                  min_samples_leaf=5,
                                                  max_depth=5),
                 n_folds=2,
                 stratify=True,
                 random_state=1):

        self.estimator = estimator
        self.n_folds = n_folds
        self.stratify = stratify
        self.random_state = random_state
        self.__cv = None
        self.__pred = None
        self.__target = None
        self.__fitOK = False

test.py 文件源码项目：stacked_generalization 作者: fukatani 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_stacked_classfier_extkfold(self):
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = StackedClassifier(bclf,
                               clfs,
                               n_folds=3,
                               verbose=0,
                               Kfold=StratifiedKFold(self.iris.target, 3),
                               stack_by_proba=False,
                               oob_score_flag=True,
                               oob_metrics=log_loss)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

test.py 文件源码项目：stacked_generalization 作者: fukatani 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_fwls_classfier(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = FWLSClassifier(bclf,
                            clfs,
                            feature_func=feature_func,
                            n_folds=3,
                            verbose=0,
                            Kfold=StratifiedKFold(self.iris.target, 3),
                            stack_by_proba=False)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

test.py 文件源码项目：stacked_generalization 作者: fukatani 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_classifier(self):
        index = [i for i in range(len(self.iris.data))]

        rf = RandomForestClassifier()
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target, index)
        prediction = jrf.predict(self.iris.data, index)
        score = accuracy_score(self.iris.target, prediction)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

        rf = RandomForestClassifier(n_estimators=20)
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target)
        index = [i for i in range(len(self.iris.data))]
        prediction2 = jrf.predict(self.iris.data, index)
        self.assertTrue((prediction == prediction2).all())

exp_utils.py 文件源码项目：gcForest 作者: kingfengji 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred