python类ExtraTreesClassifier()的实例源码-面圈网

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None

models_classification.py 文件源码项目：easyML 作者: aarshayj 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=ExtraTreesClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics)

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"
        self.model_output['OOB_Score'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf

extra_trees_preproc_for_classification.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def fit(self, X, Y, sample_weight=None):
        from sklearn.ensemble import ExtraTreesClassifier
        from sklearn.feature_selection import SelectFromModel

        num_features = X.shape[1]
        max_features = int(
            float(self.max_features) * (np.log(num_features) + 1))
        # Use at most half of the features
        max_features = max(1, min(int(X.shape[1] / 2), max_features))
        preprocessor = ExtraTreesClassifier(
            n_estimators=self.n_estimators, criterion=self.criterion,
            max_depth=self.max_depth, min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
            max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
            oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
            random_state=self.random_state, class_weight=self.class_weight
        )
        preprocessor.fit(X, Y, sample_weight=sample_weight)
        self.preprocessor = SelectFromModel(preprocessor, prefit=True)
        return self

exp_utils.py 文件源码项目：gcForest 作者: kingfengji 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

exp_utils.py 文件源码项目：gcforest 作者: w821881341 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

OD_numpy_buf.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

onlinedetectWithlittleData.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

birchForChangeWindowSize.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)
    name = []
    for i in data.columns:
        name.append(i)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

onlinedetect.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)


    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

v1.1.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    data = anamolySample
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    return warnstr

v0.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

v1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data = pd.concat([anamolySample,normalSample])
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    return warnstr

simulatev1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample, normalSample, name):
    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data = pd.concat([anamolySample,normalSample])
    for i in range(0, len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data, target)
    model = SelectFromModel(clf, prefit=True)
    outcome = model.get_support()

    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    print warnstr
    return warnstr

v0.2.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def analyseReasonWithTreeBaesd(anamolySample,normalSample,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data.extend(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)

    clf = ExtraTreesClassifier()
    clf = clf.fit(data,target)   
    model = SelectFromModel(clf,prefit=True) 
    outcome = model.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

bayesian_encode_fivelevel_withint_v2.py 文件源码项目：bnp 作者: mpearmain 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

bayesian_encode_fourlevel_withint.py 文件源码项目：bnp 作者: mpearmain 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

bayesian_encode_fivelevel_withint.py 文件源码项目：bnp 作者: mpearmain 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

bayesian_encode_fourlevel.py 文件源码项目：bnp 作者: mpearmain 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

extratrees_autotune.py 文件源码项目：bnp 作者: mpearmain 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def extratreescv(n_estimators,
                 min_samples_split,
                 min_samples_leaf,
                 max_features,
                 max_depth,
                 min_weight_fraction_leaf
                 ):

    clf = ExtraTreesClassifier(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               min_samples_leaf=int(min_samples_leaf),
                               max_features= int(max_features),
                               max_depth = int(max_depth),
                               min_weight_fraction_leaf = min_weight_fraction_leaf,
                               n_jobs=-1,
                               random_state=1234,
                               verbose=1)

    clf.fit(x0, y0)
    ll = -log_loss(y1, clf.predict_proba(x1)[:,1])
    return ll

xt.py 文件源码项目：hyperband 作者: zygmuntz 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = XT( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params )
    return train_and_eval_sklearn_classifier( clf, data )

identify_singing_voice_gender.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions

trainer.py 文件源码项目：Python-Machine-Learning-Cookbook 作者: PacktPublishing 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __init__(self, X, label_words):
        self.le = preprocessing.LabelEncoder()  
        self.clf = ExtraTreesClassifier(n_estimators=100, 
                max_depth=16, random_state=0)

        y = self.encode_labels(label_words)
        self.clf.fit(np.asarray(X), y)

extra_trees.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):
        from sklearn.ensemble import ExtraTreesClassifier as ETC

        if refit:
            self.estimator = None

        if self.estimator is None:
            num_features = X.shape[1]
            max_features = int(
                float(self.max_features) * (np.log(num_features) + 1))
            # Use at most half of the features
            max_features = max(1, min(int(X.shape[1] / 2), max_features))
            self.estimator = ETC(
                n_estimators=0, criterion=self.criterion,
                max_depth=self.max_depth, min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
                max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
                oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
                random_state=self.random_state,
                class_weight=self.class_weight,
                warm_start=True
            )

        tmp = self.estimator  # TODO copy ?
        tmp.n_estimators += n_iter
        tmp.fit(X, y, sample_weight=sample_weight)
        self.estimator = tmp
        return self

sklearn_estimators.py 文件源码项目：gcForest 作者: kingfengji 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def __init__(self, name, kwargs):
        from sklearn.ensemble import ExtraTreesClassifier
        super(GCExtraTreesClassifier, self).__init__(name, ExtraTreesClassifier, kwargs)

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def et_opt1(df_cell_train_feats, y_train, df_cell_test_feats):
    logging.info("train et_opt1 model")
    clf = ExtraTreesClassifier(n_estimators=500, n_jobs=-1, max_features="log2", min_samples_split=5, min_samples_leaf=1)
    clf.fit(df_cell_train_feats, y_train)
    y_test_pred = clf.predict_proba(df_cell_test_feats)
    return y_test_pred

ml_framework.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def get_data_preprocessor_balancing(params, y):
    d_balancing = params['layer_dict_list'][1]

    if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = None
        # for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
        params['sample_weight'] = None
    elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = 'auto'
        # for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
        if len(y.shape) > 1:
            offsets = [2 ** i for i in range(y.shape[1])]
            y_ = np.sum(y * offsets, axis=1)
        else:
            y_ = y
        unique, counts = np.unique(y_, return_counts=True)
        cw = 1. / counts
        cw = cw / np.mean(cw)
        sample_weight = np.ones(y_.shape)
        for i, ue in enumerate(unique):
            mask = y_ == ue
            sample_weight[mask] *= cw[i]
        params['sample_weight'] = sample_weight

    return params

ExtraTrees.py 文件源码项目：intelligentCampus 作者: Jackal007 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __init__(self):
        SingleClassifier.SingleClassifier.__init__(self)
        # weak classifier
        self.clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0)

actual.py 文件源码项目：AnswerClassify 作者: kenluck2001 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def makEnsemble( X, xlist, Y ):
    #naive bayes
    clf = MultinomialNB()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #K nearest neighbours
    clf = KNeighborsClassifier()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #Logistic regression
    clf = LogisticRegression(C=1)
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #random forest
    clf  = RandomForestClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #extra forest
    clf = ExtraTreesClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #decision forest
    clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #gradient boosting
    params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
                  'learning_rate': 0.01}
    clf = GradientBoostingClassifier(**params)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

code_sklearn.py 文件源码项目：kaggle_airbnb 作者: svegapons 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def clf_extra_trees(data, random_state, calibrated=False, ext_name=""):
    """
    Application of extra trees classifier. For details look at
    'clf_sklearn' function.
    """
    et = ExtraTreesClassifier(n_estimators=500, n_jobs=-1,

                                max_depth=17,
                                max_features=0.2,
                                min_samples_split=80,
                                random_state=random_state, verbose=10)

    return clf_sklearn(et, data, random_state, calibrated, clf_name='ET',
                       ext_name=ext_name)