python类AdaBoostClassifier()的实例源码

models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
classifier.py 文件源码 项目:rltk 作者: usc-isi-i2 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
ClassificationAdaBoost.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, isTrain, isOutlierRemoval):
        super(ClassificationAdaBoost, self).__init__(isTrain, isOutlierRemoval)
        # data preprocessing
        self.dataPreprocessing()

        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=7,
            algorithm="SAMME.R")
        # self.dt_stump = DecisionTreeClassifier(max_depth=14)
        # self.ada = AdaBoostClassifier(
        #     base_estimator=self.dt_stump,
        #     learning_rate=1,
        #     n_estimators=50,
        #     algorithm="SAMME")
models_classification.py 文件源码 项目:easyML 作者: aarshayj 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=AdaBoostClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
SentiCR.py 文件源码 项目:SentiCR 作者: senticr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
with_csv.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    print "AdaBoost: ", accuracy
sp_start.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "AdaBoost: ", accuracy
classifier.py 文件源码 项目:stock-price-prediction 作者: chinuy 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
ada_boosting.py 文件源码 项目:DataMiningCompetitionFirstPrize 作者: lzddzh 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def learn(x, y, test_x):
    # set sample weight
    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_ada)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_ada)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_ada)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_ada)

    clf = AdaBoostClassifier(n_estimators=variables.n_estimators_ada, learning_rate=variables.learning_rate_ada).fit(x,
                                                                                                                     y,
                                                                                                                     np.asarray(
                                                                                                                         weight_list))
    prediction_list = clf.predict(test_x)
    prediction_list_prob = clf.predict_proba(test_x)

    return prediction_list, prediction_list_prob
learning_rfc.py 文件源码 项目:SyConn 作者: StructuralNeurobiologyLab 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def init_clf(clf_used, params=None):
    if params is not None:
        params_used = params
    elif clf_used == 'svm':
        params_used = svm_params
    elif clf_used == 'ada_boost':
        params_used = rf_params
    elif clf_used == 'lr':
        params_used = lr_params
    else:
        params_used = rf_params
    if clf_used == 'svm':
        clf = SVC(**params_used)
    elif clf_used == 'ada_boost':
        rf = RandomForestClassifier(**rf_params)
        clf = AdaBoostClassifier(base_estimator=rf, **params_used)
    elif clf_used == 'lr':
        clf = LogisticRegressionCV(**params_used)
    else:
        clf = RandomForestClassifier(**params_used)
    return clf
classify.py 文件源码 项目:Stock-Market-Analysis-and-Prediction 作者: samshara 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    if savemodel == True:
        #fname_out = '{}-{}.pickle'.format(fout, datetime.now())
        fname_out = fout + '.pickle'
        with open(fname_out, 'wb') as f:
            pickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
10.1 Adaboost classifer.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_AdaBoostClassifier(*data):
    '''
    test Ada score with different number of classifiers
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
    clf.fit(X_train,y_train)
    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostClassifier")
    plt.show()
10.1 Adaboost classifer.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_AdaBoostClassifier_learning_rate(*data):
    '''
    test performance with different learning rate
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    learning_rates=np.linspace(0.01,1)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    traing_scores=[]
    testing_scores=[]
    for learning_rate in learning_rates:
        clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,n_estimators=500)
        clf.fit(X_train,y_train)
        traing_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(learning_rates,traing_scores,label="Traing score")
    ax.plot(learning_rates,testing_scores,label="Testing score")
    ax.set_xlabel("learning rate")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostClassifier")
    plt.show()
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
identify_singing_voice_gender.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
common.py 文件源码 项目:SIDR 作者: damurdock 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def constructModel(corpus, classList, features, modelOutput):
    """
    Trains a Decision Tree model on the test corpus.

    Args:
        corpus: A list of lists, containing the GC content, coverage, and class number.
        classList: A list of class names.
        features: List of variables used by each contig.
        modelOutput: Location to save model as GraphViz DOT, or False to save no model.
    Returns:
        classifier: A DecisionTreeClassifier object that has been trained on the test corpus.
    """
    corpus.sort()  # just in case
    X = []
    Y = []
    for item in corpus:
        X.append(item[:-1]) # all but the last item
        Y.append(item[-1]) # only the last item
    X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0)
    # TODO: implement classifier testing and comparison, now only baggingClassifier is used as per paper
    #treeClassifier = tree.DecisionTreeClassifier()
    #treeClassifier = treeClassifier.fit(X_train, Y_train)
    #click.echo("Decision tree classifier built, score is %s out of 1.00" % treeClassifier.score(X_test, Y_test))
    baggingClassifier = ensemble.BaggingClassifier()
    baggingClassifier = baggingClassifier.fit(X_train, Y_train)
    click.echo("Bagging classifier built, score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test))
    #forestClassifier = ensemble.RandomForestClassifier(n_estimators=10)
    #forestClassifier = forestClassifier.fit(X_train, Y_train)
    #click.echo("Random forest classifier built, score is %s out of 1.00" % forestClassifier.score(X_test, Y_test))
    #adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100)
    #adaClassifier = adaClassifier.fit(X_train, Y_train)
    #click.echo("AdaBoost classifier built, score is %s out of 1.00" % adaClassifier.score(X_test, Y_test))
    #gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100)
    #gradientClassifier = gradientClassifier.fit(X_train, Y_train)
    #click.echo("Gradient tree boosting classifier built, score is %s out of 1.00" % gradientClassifier.score(X_test, Y_test))
    if modelOutput:
        with open(modelOutput, 'w') as dotfile:
            tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features,
                                 class_names=classList, filled=True, rounded=True, special_characters=True)
    return baggingClassifier
Prediction.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def adaboost(train, test, smoteit=True):
  "ADABOOST"
  if smoteit:
    train = SMOTE(train)
  clf = AdaBoostClassifier()
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_DF[test_DF.columns[:-2]]).tolist()
  return preds
dmonscilearnclassification.py 文件源码 项目:dmon-adp 作者: igabriel85 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def adaBoost(self, settings, data=None, dropna=True):
        df = self.__loadData(data, dropna)
        features = df.columns[:-1]
        X = df[features]
        y = df.iloc[:, -1].values
        seed = 7
        num_trees = 500
        kfold = model_selection.KFold(n_splits=10, random_state=seed)
        print kfold
        model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
        results = model_selection.cross_val_score(model, X, y, cv=kfold)
        model.fit(X, y)
        print results.mean()
        print model.score(X, y)
        return True
service.py 文件源码 项目:intellead-classification 作者: intellead 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def classification(lead):
    #classifiers = [
    #    ('ab', AdaBoostClassifier()),
    #    ('dt', DecisionTreeClassifier(max_depth=5)),
    #    ('kn', KNeighborsClassifier(16)),
    #]
    inputs = get_dataset_input_from_database(lead.keys())
    outputs = get_dataset_output_from_database()
    print('The total number of examples in the dataset is: %d' % (len(inputs)))
    inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
    print('The number of examples used for training are: %d' % (len(inputs_training)))
    print('The number of examples used for testing are: %d' % (len(inputs_test)))
    knn = KNeighborsClassifier(n_neighbors=7, p=2)
    knn.fit(inputs_training, np.ravel(outputs_training))
    print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
    #voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
    #voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
    #print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
    print('Lead data:')
    print(lead)
    data_to_predict = convert_dict_to_tuple(lead)
    print('Lead data to predict:')
    print(data_to_predict)
    lead_status = knn.predict(data_to_predict)
    lead_status_value = lead_status[0]
    #lead_status = voting_classifier.predict(data_to_predict)
    print('According to lead data, his status is: %d' % (lead_status_value))
    print('[0] unqualified [1] qualified')
    proba = knn.predict_proba(data_to_predict)
    max_proba = max(proba[0])
    print('Proba is: %d%%' %(max_proba*100))
    lead_status_dict = dict()
    dict.update(lead_status_dict, value=str(lead_status_value))
    dict.update(lead_status_dict, proba=str(max_proba))
    return lead_status_dict
test.py 文件源码 项目:Audio-classification-using-Bag-of-Frames-approach 作者: amogh3892 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def adaboost_predict(training_samples, training_labels, test_samples, test_lables,n_estimators=50, learning_rate=1.0):
    from sklearn.ensemble import AdaBoostClassifier

    clf = AdaBoostClassifier(n_estimators = n_estimators, learning_rate =learning_rate)

    t0 = time()
    clf.fit(training_samples,training_labels)
    training_time = round(time()-t0, 3)

    t0 = time()
    pred = clf.predict(test_samples)
    test_time = round(time()-t0, 3)

    from sklearn.metrics import accuracy_score

    acc = accuracy_score(pred,test_lables)

    no_features = np.array(training_samples).shape[1]
    training_samples = np.array(training_samples).shape[0]
    test_samples = np.array(test_samples).shape[0]

    with open("Temp\\results.txt","w") as outfile:
        outfile.write("Alogirthm : {}\n".format("Adaboost"))
        outfile.write("Estimators  = {}\n".format(n_estimators))
        outfile.write("Learning rate = {}\n".format(learning_rate))
        outfile.write("No of features : {}\n".format(no_features))
        outfile.write("No of training samples : {}\n".format(training_samples))
        outfile.write("No of test samples : {}\n".format(test_samples))
        outfile.write("Training time : {}\n".format(training_time))
        outfile.write("Test time : {}\n".format(test_time))
        outfile.write("Accuracy : {}\n".format(acc))

    with open("Temp\\result_labels.csv","wb") as outfile:
        np.savetxt(outfile,pred)
ClassificationHmmGeneralize.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, isTrain):
        super(ClassificationHmmGeneralize, self).__init__(isTrain)
        # data preprocessing
        self.dataPreprocessing()

        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=5,
            algorithm="SAMME.R")

        # load the general data
        # feature 0~7: flight number dummy variables
        # feature 8: departure date; feature 9: observed date state;
        # feature 10: minimum price; feature 11: maximum price

        # feature 12: output; feature 13: current price
        # feature 14: flight index
        self.X_general = np.load('inputGeneralClf_HmmParsed/X_train.npy')
        self.y_general = np.load('inputGeneralClf_HmmParsed/y_train.npy')
        self.y_general = self.y_general.reshape((self.y_general.shape[0], 1))
        self.y_general_price = np.load('inputGeneralClf_HmmParsed/y_train_price.npy')
        self.y_general_price = self.y_general_price.reshape((self.y_general_price.shape[0], 1))
        self.y_general_index = np.load('inputGeneralClf_HmmParsed/y_index.npy')
        self.y_general_index = self.y_general_index.reshape((self.y_general_index.shape[0], 1))



        self.routes_general = ["BGY_OTP", # route 1
                "BUD_VKO", # route 2
                "CRL_OTP", # route 3
                "CRL_WAW", # route 4
                "LTN_OTP", # route 5
                "LTN_PRG", # route 6
                "OTP_BGY", # route 7
                "OTP_CRL", # route 8
                "OTP_LTN", # route 9
                "PRG_LTN", # route 10
                "VKO_BUD", # route 11
                "WAW_CRL"] # route 12
ml_framework.py 文件源码 项目:FLASH 作者: yuyuz 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_data_preprocessor_balancing(params, y):
    d_balancing = params['layer_dict_list'][1]

    if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = None
        # for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
        params['sample_weight'] = None
    elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = 'auto'
        # for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
        if len(y.shape) > 1:
            offsets = [2 ** i for i in range(y.shape[1])]
            y_ = np.sum(y * offsets, axis=1)
        else:
            y_ = y
        unique, counts = np.unique(y_, return_counts=True)
        cw = 1. / counts
        cw = cw / np.mean(cw)
        sample_weight = np.ones(y_.shape)
        for i, ue in enumerate(unique):
            mask = y_ == ue
            sample_weight[mask] *= cw[i]
        params['sample_weight'] = sample_weight

    return params
event_filter.py 文件源码 项目:MLAB_Intuit 作者: rykard95 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def generate_filter(X_train, y_train):

#    clf = RidgeClassifierCV(alphas=[0.01, 0.1, 1, 10]) 
    clf = RandomForestClassifier(n_jobs=4)
#    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)
    return clf
Adaboost.py 文件源码 项目:Machine-Learning-Tools-on-Iris-Dataset 作者: debjitpaul 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def perform_adaboost(self,X_train_std,y_train,X_test_std, y_test): ##perform adaboost

      ada = AdaBoostClassifier(n_estimators=10)
      ada.fit(X_train_std, y_train)
      train_score=cross_val_score(ada,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(ada,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = ada.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()
contentView.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}

        for user in self.testDict:
            pred_labels[user] = classifier.predict([self.model.docvecs[user]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
        #                                         max_depth=1, random_state=0)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'GBDT:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = AdaBoostClassifier(n_estimators=100)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'AdaBoost:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = RandomForestClassifier(n_estimators=10)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Random Forest:'
        # print classification_report(self.testLabel, pred_labels)
classifier.py 文件源码 项目:stock-price-prediction 作者: chinuy 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    accuracy = clf.score(X_test, y_test)

    return accuracy
test_elm.py 文件源码 项目:extreme-learning-machines 作者: IssamLaradji 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_sample_weight_elm():
    """Smoke test - AdaBoostClassifier should work with ELMClassifer."""
    X = Xdigits_binary[:50]
    y = ydigits_binary[:50]

    elm = ELMClassifier(n_hidden=20)
    clf = AdaBoostClassifier(n_estimators=3, base_estimator=elm)
    clf.fit(X, y)
    assert_greater(clf.score(X, y), 0.9)
SingleClassifier.py 文件源码 项目:intelligentCampus 作者: Jackal007 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def getBestOne(self, name):
        # if the classifier has already generated
        try:
            from sklearn.externals import joblib
            clf = joblib.load(name + '.pkl')
            return clf
        except:
            pass

        # if the classifier is not exists
        # search for the best loop time 
        bestAccuracyRate, n_estimators = 0, 1
        for loopTimes in range(2, 200):

            sclf = AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=loopTimes, algorithm='SAMME')

            # cross validation to get the score
            X_train, X_test, Y_train, Y_test = train_test_split(self.X, self.Y, test_size=0.1, random_state=0)
            sclf.fit(X_train, Y_train)
            accuracyRate = sclf.score(X_test, Y_test)

            if accuracyRate > bestAccuracyRate:
                bestAccuracyRate = accuracyRate
                n_estimators = loopTimes

        # save the classifier as a dump
        joblib.dump(sclf, name + '.pkl')

        return AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=n_estimators, algorithm='SAMME')
sniffer_classifier.py 文件源码 项目:smart_sniffer 作者: ScarWar 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def ada_boost_classifier(self, data, target, learning_rate=1, n_estimators=400, enable_ada=False):
        ada_boost = AdaBoostClassifier(
            base_estimator=self.clf,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            algorithm="SAMME.R")
        ada_boost.fit(data, target)
        if not enable_ada:
            self.clf = ada_boost
        print "AdaBoost training finished"


问题


面经


文章

微信
公众号

扫码关注公众号