python类AdaBoostClassifier()的实例源码

sniffer_classifier.py 文件源码 项目:smart_sniffer 作者: ScarWar 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def ada_boost_classifier_err(self, data, target, learning_rate=1, n_estimators=400, show_score=False):
        ada_boost = AdaBoostClassifier(
            base_estimator=self.clf,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            algorithm="SAMME.R")
        ada_boost.fit(data, target)
        score = ada_boost.score(data, target)
        if not show_score:
            print "Fitness score: " + str(score)
        return 1.0 - score
enron_poi_ml_ci.py 文件源码 项目:machine-learning 作者: cinserra 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def varius_classifiers():
    # List of tuples of a classifier and its parameters.
    clf_list = []

    clf_linearsvm = LinearSVC()
    params_linearsvm = {"C": [0.5, 1, 5, 10, 100, 10**10],"tol":[0.1, 0.0000000001],"class_weight":['balanced']}
    clf_list.append( (clf_linearsvm, params_linearsvm) )

    clf_tree = DecisionTreeClassifier()
    params_tree = { "min_samples_split":[2, 5, 10, 20],"criterion": ('gini', 'entropy')}
    clf_list.append( (clf_tree, params_tree) )

    clf_random_tree = RandomForestClassifier()
    params_random_tree = {  "n_estimators":[2, 3, 5],"criterion": ('gini', 'entropy')}
    clf_list.append( (clf_random_tree, params_random_tree) )

    clf_adaboost = AdaBoostClassifier()
    params_adaboost = { "n_estimators":[20, 30, 50, 100]}
    clf_list.append( (clf_adaboost, params_adaboost) )

    clf_knn = KNeighborsClassifier()
    params_knn = {"n_neighbors":[2, 5], "p":[2,3]}
    clf_list.append( (clf_knn, params_knn) )

    clf_log = LogisticRegression()
    params_log = {"C":[0.5, 1, 10, 10**2,10**10, 10**20],"tol":[0.1, 0.00001, 0.0000000001],"class_weight":['balanced']}
    clf_list.append( (clf_log, params_log) )

    clf_lda = LinearDiscriminantAnalysis()
    params_lda = {"n_components":[0, 1, 2, 5, 10]}
    clf_list.append( (clf_lda, params_lda) )

    logistic = LogisticRegression()
    rbm = BernoulliRBM()
    clf_rbm = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
    params_rbm = {"logistic__tol":[0.0000000001, 10**-20],"logistic__C":[0.05, 1, 10, 10**2,10**10, 10**20],"logistic__class_weight":['balanced'],"rbm__n_components":[2,3,4]}
    clf_list.append( (clf_rbm, params_rbm) )

    return clf_list
User_Interface.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def exportPresentationData(classifier,action):
        dir = input('Give Data Directory: ')

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
Exporter.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 49 收藏 0 点赞 0 评论 0
def exportPresentationData(classifier,action,dir):

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
model_loop.py 文件源码 项目:fake-news-detection 作者: aldengolab 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             }
nus_learner_nb_ab.py 文件源码 项目:Image_Retrieval 作者: ddlricardo 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.AdaBoostClassifier(naive_bayes.GaussianNB())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
nus_learner_ls_ab.py 文件源码 项目:Image_Retrieval 作者: ddlricardo 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.AdaBoostClassifier(svm.LinearSVC())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
classifier.py 文件源码 项目:genrec 作者: kkanellis 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None):
        self.logger = get_logger('classifier')
        self.display_name = name

        self.genres = genres
        self.m_genres = { genre:i for i, genre in enumerate(genres) }
        self.randstate = np.random.RandomState()
        self.scaler = StandardScaler()

        clf_kwargs = { } if not clf_kwargs else clf_kwargs
        if type in ['svm', 'mlp']:
            clf_kwargs['random_state'] = self.randstate

        if type == 'knn':
            self.proto_clf = KNeighborsClassifier(**clf_kwargs)
        elif type == 'svm':
            self.proto_clf = SVC(**clf_kwargs)
        elif type == 'dtree':
            self.proto_clf = DecisionTreeClassifier(**clf_kwargs)
        elif type == 'gnb':
            self.proto_clf = GaussianNB(**clf_kwargs)
        elif type == 'perc':
            self.proto_clf = Perceptron(**clf_kwargs)
        elif type == 'mlp':
            self.proto_clf = MLPClassifier(**clf_kwargs)
        elif type == 'ada':
            self.proto_clf = AdaBoostClassifier(**clf_kwargs)
        else:
            raise LookupError('Classifier type "{}" is invalid'.format(type))

        self._convert_data(data)

        self.logger.info('Classifier: {} (params={})'.format(
            self.proto_clf.__class__.__name__,
            clf_kwargs
        ))
nlp.py 文件源码 项目:nlp_experiments 作者: amirhdrz 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def train_model(text_matrix, categories):
    # model = AdaBoostClassifier(
    #     DecisionTreeClassifier(max_depth=3),
    #     n_estimators=500,
    #     algorithm="SAMME")

    model = RandomForestClassifier(n_estimators=100, max_depth=8)

    model.fit(text_matrix, categories)

    return model
sklearner.py 文件源码 项目:anomalous-vertices-detection 作者: Kagandi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def set_adaboost_classifier(self):
        return SkLearner(ensemble.AdaBoostClassifier())
scikitlearn.py 文件源码 项目:sia-cog 作者: deepakkumar1984 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result
10.1 Adaboost classifer.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_AdaBoostClassifier_base_classifier(*data):
    '''
    test Adaboost classifier with different number of classifier, and category of classifier
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    from sklearn.naive_bayes import GaussianNB
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    ax=fig.add_subplot(2,1,1)

    clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Decision Tree")

    ax=fig.add_subplot(2,1,2)
    clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB())
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes")
    plt.show()
10.1 Adaboost classifer.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_AdaBoostClassifier_algorithm(*data):
    '''
    test performance with different algorithm
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    algorithms=['SAMME.R','SAMME']
    fig=plt.figure()
    learning_rates=[0.05,0.1,0.5,0.9]
    for i,learning_rate in enumerate(learning_rates):
        ax=fig.add_subplot(2,2,i+1)
        for i ,algorithm in enumerate(algorithms):
            clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,
                algorithm=algorithm)
            clf.fit(X_train,y_train)
            ## ??
            estimators_num=len(clf.estimators_)
            X=range(1,estimators_num+1)
            ax.plot(list(X),list(clf.staged_score(X_train,y_train)),
                label="%s:Traing score"%algorithms[i])
            ax.plot(list(X),list(clf.staged_score(X_test,y_test)),
                label="%s:Testing score"%algorithms[i])
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_title("learing rate:%f"%learning_rate)
    fig.suptitle("AdaBoostClassifier")
    plt.show()
ensemble_classifier.py 文件源码 项目:sport-news-retrieval 作者: Andyccs 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def ensemble_classify():
  label_list = get_labels()
  tweet_list = get_labelled_tweets()
  # vectorise using tf-idf
  vectoriser = TfidfVectorizer(min_df=3,
                               max_features=None,
                               strip_accents='unicode',
                               analyzer='word',
                               token_pattern=r'\w{1,}',
                               ngram_range=(1, 2),
                               use_idf=1,
                               smooth_idf=1,
                               sublinear_tf=1,)

  ## do transformation into vector
  vectoriser.fit(tweet_list)
  vectorised_tweet_list = vectoriser.transform(tweet_list)
  train_vector, test_vector, train_labels, test_labels = train_test_split(vectorised_tweet_list,
                                                                          label_list,
                                                                          test_size=0.8,
                                                                          random_state=42)

  n_estimators = 10  # number of weak learners
  model = AdaBoostClassifier(n_estimators=n_estimators)
  ada_classifier = model.fit(train_vector, train_labels)
  result = ada_classifier.predict(test_vector)

  # output result to csv
  create_directory('data')
  result.tofile("data/tfidf_ada.csv", sep=',')
  save_model(ada_classifier, 'tfidf_ada')

  # evaluation
  binarise_result = label_binarize(result, classes=class_list)
  binarise_labels = label_binarize(test_labels, classes=class_list)
  generate_eval_metrics(binarise_result, 'tfidf_ada', binarise_labels)
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_classification_toy():
    # Check classification on a toy dataset.
    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg, random_state=0)
        clf.fit(X, y_class)
        assert_array_equal(clf.predict(T), y_t_class)
        assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
        assert_equal(clf.predict_proba(T).shape, (len(T), 2))
        assert_equal(clf.decision_function(T).shape, (len(T),))
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_iris():
    # Check consistency on dataset iris.
    classes = np.unique(iris.target)
    clf_samme = prob_samme = None

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)
        clf.fit(iris.data, iris.target)

        assert_array_equal(classes, clf.classes_)
        proba = clf.predict_proba(iris.data)
        if alg == "SAMME":
            clf_samme = clf
            prob_samme = proba
        assert_equal(proba.shape[1], len(classes))
        assert_equal(clf.decision_function(iris.data).shape[1], len(classes))

        score = clf.score(iris.data, iris.target)
        assert score > 0.9, "Failed with algorithm %s and score = %f" % \
            (alg, score)

    # Somewhat hacky regression test: prior to
    # ae7adc880d624615a34bafdb1d75ef67051b8200,
    # predict_proba returned SAMME.R values for SAMME.
    clf_samme.algorithm = "SAMME.R"
    assert_array_less(0,
                      np.abs(clf_samme.predict_proba(iris.data) - prob_samme))
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_pickle():
    # Check pickability.
    import pickle

    # Adaboost classifier
    for alg in ['SAMME', 'SAMME.R']:
        obj = AdaBoostClassifier(algorithm=alg)
        obj.fit(iris.data, iris.target)
        score = obj.score(iris.data, iris.target)
        s = pickle.dumps(obj)

        obj2 = pickle.loads(s)
        assert_equal(type(obj2), obj.__class__)
        score2 = obj2.score(iris.data, iris.target)
        assert_equal(score, score2)

    # Adaboost regressor
    obj = AdaBoostRegressor(random_state=0)
    obj.fit(boston.data, boston.target)
    score = obj.score(boston.data, boston.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(boston.data, boston.target)
    assert_equal(score, score2)
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_error():
    # Test that it gives proper exception on deficient input.
    assert_raises(ValueError,
                  AdaBoostClassifier(learning_rate=-1).fit,
                  X, y_class)

    assert_raises(ValueError,
                  AdaBoostClassifier(algorithm="foo").fit,
                  X, y_class)

    assert_raises(ValueError,
                  AdaBoostClassifier().fit,
                  X, y_class, sample_weight=np.asarray([-1]))
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_base_estimator():
    # Test different base estimators.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC

    # XXX doesn't work with y_class because RF doesn't support classes_
    # Shouldn't AdaBoost run a LabelBinarizer?
    clf = AdaBoostClassifier(RandomForestClassifier())
    clf.fit(X, y_regr)

    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    clf.fit(X, y_class)

    from sklearn.ensemble import RandomForestRegressor
    from sklearn.svm import SVR

    clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0)
    clf.fit(X, y_regr)

    clf = AdaBoostRegressor(SVR(), random_state=0)
    clf.fit(X, y_regr)

    # Check that an empty discrete ensemble fails in fit, not predict.
    X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
    y_fail = ["foo", "bar", 1, 2]
    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    assert_raises_regexp(ValueError, "worse than random",
                         clf.fit, X_fail, y_fail)
test_weight_boosting.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_sample_weight_missing():
    from sklearn.linear_model import LogisticRegression
    from sklearn.cluster import KMeans

    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
    assert_raises(ValueError, clf.fit, X, y_regr)

    clf = AdaBoostRegressor(KMeans())
    assert_raises(ValueError, clf.fit, X, y_regr)


问题


面经


文章

微信
公众号

扫码关注公众号