python类VotingClassifier()的实例源码-面圈网

p-final.py 文件源码项目：Stock-Market-Prediction 作者: Diptiranjan1 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print()
    print()
    return confidence

# examples of running:

article_learner.py 文件源码项目：fake_news 作者: bmassman 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def article_trainers(articles: ArticleDB):
    """
    Run repeated models against article db to predict validity score for
    articles.
    """
    models = [(DecisionTreeClassifier, {}),
              (RandomForestClassifier, {}),
              (LogisticRegression, {'C': [0.01, 0.1, 1, 10, 100]}),
              (MultinomialNB, {'alpha': [0.1, 1.0, 10.0, 100.0]}),
              (LinearSVC, {'C': [0.01, 0.1, 1, 10, 100]})]
    trained_models = []
    for classifier, param_grid in models:
        res = train_model(articles, classifier, param_grid, probabilities=True)
        trained_models.append((str(res), res))
    ensemble_learner = VotingClassifier(estimators=trained_models[:4],
                                        voting='soft')
    train_model(articles, ensemble_learner, {})

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_estimator_init():
    eclf = VotingClassifier(estimators=[])
    msg = ('Invalid `estimators` attribute, `estimators` should be'
           ' a list of (string, estimator) tuples')
    assert_raise_message(AttributeError, msg, eclf.fit, X, y)

    clf = LogisticRegression(random_state=1)

    eclf = VotingClassifier(estimators=[('lr', clf)], voting='error')
    msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
    msg = ('Number of classifiers and weights must be equal'
           '; got 2 weights, 1 estimators')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

__init__.py 文件源码项目：marconibot 作者: s4w3d0ff 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __init__(self, api, lobes=False):
        """
        lobes = a dict of classifiers to use in the VotingClassifier
            defaults to RandomForestClassifier and DecisionTreeClassifier
        """
        self.api = api
        if not lobes:
            lobes = {'rf': RandomForestClassifier(n_estimators=7,
                                                  random_state=666),
                     'dt': DecisionTreeClassifier()
                     }
        self.lobe = VotingClassifier(
            estimators=[(lobe, lobes[lobe]) for lobe in lobes],
            voting='hard',
            n_jobs=-1)
        self._trained = False
        self.split = splitTrainTestData
        self.prep = prepDataframe

_classif.py 文件源码项目：brainpipe 作者: EtienneCmb 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _voting(estimators, **kwargs):
        """Build the classifier
        """
        clfObj = VotingClassifier([(k.shStr, k) for k in estimators], n_jobs=1, **kwargs)
        clfObj.lgStr = ' + '.join([k.lgStr for k in estimators])
        clfObj.shStr = ' + '.join([k.shStr for k in estimators])
        return clfObj

service.py 文件源码项目：intellead-classification 作者: intellead 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def classification(lead):
    #classifiers = [
    #    ('ab', AdaBoostClassifier()),
    #    ('dt', DecisionTreeClassifier(max_depth=5)),
    #    ('kn', KNeighborsClassifier(16)),
    #]
    inputs = get_dataset_input_from_database(lead.keys())
    outputs = get_dataset_output_from_database()
    print('The total number of examples in the dataset is: %d' % (len(inputs)))
    inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
    print('The number of examples used for training are: %d' % (len(inputs_training)))
    print('The number of examples used for testing are: %d' % (len(inputs_test)))
    knn = KNeighborsClassifier(n_neighbors=7, p=2)
    knn.fit(inputs_training, np.ravel(outputs_training))
    print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
    #voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
    #voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
    #print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
    print('Lead data:')
    print(lead)
    data_to_predict = convert_dict_to_tuple(lead)
    print('Lead data to predict:')
    print(data_to_predict)
    lead_status = knn.predict(data_to_predict)
    lead_status_value = lead_status[0]
    #lead_status = voting_classifier.predict(data_to_predict)
    print('According to lead data, his status is: %d' % (lead_status_value))
    print('[0] unqualified [1] qualified')
    proba = knn.predict_proba(data_to_predict)
    max_proba = max(proba[0])
    print('Proba is: %d%%' %(max_proba*100))
    lead_status_dict = dict()
    dict.update(lead_status_dict, value=str(lead_status_value))
    dict.update(lead_status_dict, proba=str(max_proba))
    return lead_status_dict

senti_models.py 文件源码项目：senti 作者: stevenxxiu 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def fit_voting(self):
        voting = 'soft'
        names = [
            # 'svm(word_n_grams,char_n_grams,all_caps,hashtags,punctuations,punctuation_last,emoticons,emoticon_last,'
            # 'elongated,negation_count)',
            # 'logreg(w2v_doc)',
            # 'logreg(w2v_word_avg_google)',
            'word2vec_bayes',
            'cnn_word(embedding=google)',
            'rnn_word(embedding=google)',
        ]
        classifiers = [ExternalModel({
            self.val_docs: os.path.join(self.data_dir, 'results/val/{}.json'.format(name)),
            self.test_docs: os.path.join(self.data_dir, 'results/test/{}.json'.format(name)),
        }) for name in names]
        all_scores = []
        for classifier in classifiers:
            scores = classifier.predict_proba(self.val_docs)
            if voting == 'hard':
                scores = Binarizer(1 / 3).transform(scores)
            all_scores.append(scores)
        all_scores = np.array(all_scores)
        all_scores_first, all_scores_rest = all_scores[0], all_scores[1:]
        le = LabelEncoder().fit(self.classes_)
        val_label_indexes = le.transform(self.val_labels())
        # assume w_0=1 as w is invariant to scaling
        w = basinhopping(
            lambda w_: -(val_label_indexes == np.argmax((
                all_scores_first + all_scores_rest * w_.reshape((len(w_), 1, 1))
            ).sum(axis=0), axis=1)).sum(), np.ones(len(classifiers) - 1), niter=1000,
            minimizer_kwargs=dict(method='L-BFGS-B', bounds=[(0, None)] * (len(classifiers) - 1))
        ).x
        w = np.hstack([[1], w])
        w /= w.sum()
        logging.info('w: {}'.format(w))
        estimator = VotingClassifier(list(zip(names, classifiers)), voting=voting, weights=w)
        estimator.le_ = le
        estimator.estimators_ = classifiers
        return 'vote({})'.format(','.join(names)), estimator

gesture_recognizer.py 文件源码项目：Sign-Language-Recognition 作者: achyudhk 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def train_gesture_classifier(userlist, foldername):
    """

    :param userlist:
    :param foldername:
    :return:
    """
    work_arr = list()
    class_alpha_dict = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'K': 9, 'L': 10,
                        'M': 11, 'N': 12, 'O': 13, 'P': 14, 'Q': 15, 'R': 16, 'S': 17, 'T': 18, 'U': 19, 'V': 20,
                        'W': 21, 'X': 22, 'Y': 23}

    print("Generating training features for gesture classifier...")
    for i0 in userlist:
        current_folder = foldername + i0 + '/'
        crop_df = pd.read_csv(current_folder + i0 + '_loc.csv', index_col=0, header=0)
        filelist = [x for x in os.listdir(current_folder) if x.endswith('.jpg')]
        for filename in filelist:
            img_arr = imread(current_folder + filename, as_grey=True)
            crop_before_x = crop_df.loc[i0 + '/' + filename, 'top_left_x']
            crop_before_y = crop_df.loc[i0 + '/' + filename, 'top_left_y']
            crop_after_x = crop_df.loc[i0 + '/' + filename, 'bottom_right_x']
            crop_after_y = crop_df.loc[i0 + '/' + filename, 'bottom_right_y']
            work_arr.append((img_arr, crop_before_x, crop_before_y, crop_after_x, crop_after_y, class_alpha_dict[filename[0]]))

    x_train = list(map(generate_training_set, work_arr))
    del work_arr
    print("Garbage collector deleted objects:", gc.collect())

    random.shuffle(x_train)
    y_train = [x[1] for x in x_train]
    x_train = [x[0] for x in x_train]
    print("Size of gesture classifier training set:", len(y_train))

    rfc_classifier = RandomForestClassifier(n_estimators=500, max_features='sqrt', n_jobs=8, warm_start=False)
    svc_classifier = SVC(cache_size=6000, kernel='linear', tol=1e-3, decision_function_shape='ovr', C=1, probability=True)
    voting_classifier = VotingClassifier(estimators=[('sv', svc_classifier), ('rf1', rfc_classifier)], voting='soft')
    voting_classifier.fit(x_train, y_train)
    print("Gesture classifier training complete.")

    return voting_classifier

gesture_classifier.py 文件源码项目：Sign-Language-Recognition 作者: achyudhk 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def train_gesture_classifier(userlist, foldername="data/"):
    """

    :param userlist:
    :param foldername:
    :return:
    """
    work_arr = list()
    print("Generating training features for gesture classifier...")
    for i0 in userlist:
        current_folder = foldername + i0 + '/'
        crop_df = pd.read_csv(current_folder + i0 + '_loc.csv', index_col=0, header=0)
        filelist = [x for x in listdir(current_folder) if x.endswith('.jpg')]
        for filename in filelist:
            img_arr = io.imread(current_folder + filename, as_grey=True)
            crop_before_x = crop_df.loc[i0 + '/' + filename, 'top_left_x']
            crop_before_y = crop_df.loc[i0 + '/' + filename, 'top_left_y']
            crop_after_x = crop_df.loc[i0 + '/' + filename, 'bottom_right_x']
            crop_after_y = crop_df.loc[i0 + '/' + filename, 'bottom_right_y']
            work_arr.append((img_arr, crop_before_x, crop_before_y, crop_after_x, crop_after_y, class_alpha_dict[filename[0]]))

    thread_pool = Pool(8)
    x_train = thread_pool.map(generate_training_set, work_arr)
    thread_pool.close()
    del work_arr
    print("Garbage collector deleted objects:", gc.collect())
    random.shuffle(x_train)
    y_train = [x[1] for x in x_train]
    x_train = [x[0] for x in x_train]
    print("Size of gesture classifier training set:", len(y_train))

    rfc_classifier = RandomForestClassifier(n_estimators=500, max_features='sqrt', n_jobs=8, warm_start=False)
    svc_classifier = SVC(cache_size=6000, kernel='linear', tol=1e-3, decision_function_shape='ovr', C=1, probability=True)
    voting_classifier = VotingClassifier(estimators=[('sv', svc_classifier), ('rf1', rfc_classifier)], voting='soft')
    voting_classifier.fit(x_train, y_train)
    print("Gesture classifier training complete.")

    return voting_classifier

User_Interface.py 文件源码项目：yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def exportPresentationData(classifier,action):
        dir = input('Give Data Directory: ')

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)

Exporter.py 文件源码项目：yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码文件源码阅读 53 收藏 0 点赞 0 评论 0

def exportPresentationData(classifier,action,dir):

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)

optimize_model_ensemble.py 文件源码项目：BotBoosted 作者: brityboy 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def create_voting_classifier_ensemble(model_tuple_list):
    '''
    INPUT
         - model tuple list: list of model tuples (name, model)
    OUTPUT
         - a fit ensemble

    Return fit voting ensemble
    '''
    ensemble = VotingClassifier(model_tuple_list, voting='soft')
    ensemble.fit(X_train_b, y_train_b)
    return ensemble

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_predictproba_hardvoting():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='hard')
    msg = "predict_proba is not available when voting='hard'"
    assert_raise_message(AttributeError, msg, eclf.predict_proba, X)

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    msg = ("This VotingClassifier instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this method.")
    assert_raise_message(NotFittedError, msg, eclf.predict_proba, X)

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_majority_label_iris():
    """Check classification by majority label on dataset iris."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                voting='hard')
    scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
    assert_almost_equal(scores.mean(), 0.95, decimal=2)

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_tie_situation():
    """Check voting classifier selects smaller class label in tie situation."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
                            voting='hard')
    assert_equal(clf1.fit(X, y).predict(X)[73], 2)
    assert_equal(clf2.fit(X, y).predict(X)[73], 1)
    assert_equal(eclf.fit(X, y).predict(X)[73], 1)

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_predict_on_toy_problem():
    """Manually check predicted class labels for toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()

    X = np.array([[-1.1, -1.5],
                  [-1.2, -1.4],
                  [-3.4, -2.2],
                  [1.1, 1.2],
                  [2.1, 1.4],
                  [3.1, 2.3]])

    y = np.array([1, 1, 1, 2, 2, 2])

    assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='hard',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='soft',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_multilabel():
    """Check if error is raised for multilabel classification."""
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          random_state=123)
    clf = OneVsRestClassifier(SVC(kernel='linear'))

    eclf = VotingClassifier(estimators=[('ovr', clf)], voting='hard')

    try:
        eclf.fit(X, y)
    except NotImplementedError:
        return

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_gridsearch():
    """Check GridSearch support."""
    clf1 = LogisticRegression(random_state=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                voting='soft')

    params = {'lr__C': [1.0, 100.0],
              'voting': ['soft', 'hard'],
              'weights': [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]]}

    grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
    grid.fit(iris.data, iris.target)

MachineLearning.py 文件源码项目：DiseaseModeling 作者: slerman12 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def ensemble(algs, alg_names, ensemble_name=None, in_ensemble=None, weights=None, voting="soft"):
    # Estimators for the ensemble
    estimators = []

    # Construct ensemble name
    if weights is not None:
        name = "Weighted Ensemble of "
    else:
        name = "Ensemble of "

    # Add respective algorithms to estimators and construct name
    for index, alg in enumerate(algs):
        if (in_ensemble is None) or in_ensemble[index]:
            estimators.append((alg_names[index], alg))
            name += alg_names[index] + ", "

    # Remove extra comma
    name = name[:-2]

    # Use provided name if not none
    if ensemble_name is not None:
        # Set name
        name = ensemble_name

    # Create ensemble
    alg = VotingClassifier(estimators=estimators, voting=voting, weights=weights)

    # Return ensemble and name
    return {"alg": alg, "name": name}

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def predictAndTestEnsemble(X, y, Xtest, ytest, classifiers=[], selectKBest=0):
    """
    Trains an Ensemble of classifiers (with default params) and using a training dataset, 
    and returns majority vote using the same training dataset and an out-of-sample test dataset
    :type X: list
    :param y: The labels corresponding to the training feature vectors
    :type y: list
    :param Xtest: The matrix of test feature vectors
    :type Xtest: list
    :param ytest: The labels corresponding to the test feature vectors
    :type ytest: list
    :param classifiers: A list of classifiers to use in the ensemble
    :type classifiers: list of str
    :param selectKBest: The number of best features to select
    :type selectKBest: int
    :return: Two lists of the validation and test accuracies across the k-folds
    """
    try:
        predicted, predicted_test = [], []
        # Prepare the data
        X, y, Xtest, ytest = numpy.array(X), numpy.array(y), numpy.array(Xtest), numpy.array(ytest)
        # Define classifiers
        ensembleClassifiers = []
        for c in classifiers:
            if c.lower().find("knn") != -1:
                K = int(c.split('-')[-1])
                clf = neighbors.KNeighborsClassifier(n_neighbors=K)
            elif c.lower().find("svm") != -1:
                clf = svm.SVC(kernel='linear', C=1)
            elif c.lower().find("forest") != -1:
                E = int(c.split('-')[-1])
                clf = ensemble.RandomForestClassifier(n_estimators=E,)
            # Add to list
            ensembleClassifiers.append((c, clf))
        # Select K Best features if applicable
        X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
        Xtest_new = SelectKBest(chi2, k=selectKBest).fit_transform(Xtest, ytest) if selectKBest > 0 else Xtest
        # Train and fit the voting classifier
        voting = VotingClassifier(estimators=ensembleClassifiers, voting='hard')
        prettyPrint("Fitting ensemble model")
        voting = voting.fit(X_new, y)
        prettyPrint("Validating model")
        predicted = voting.predict(X_new)
        # Same for the test dataset
        prettyPrint("Testing the model")
        predicted_test = voting.predict(Xtest_new)

    except Exception as e:
        prettyPrintError(e) 
        return [], []

    return predicted, predicted_test

models.py 文件源码项目：syracuse_public 作者: dssg 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def define_model(modelname):
    """
    Outputs model type and parameters

    Input
    ----
    model: str
       model type e.g., Logistic Regression
    parameters: ls
       hyperparameters of corresponding model

    Output
    ------
    clf: model object
       Model Object Classifier

    """
    if modelname == 'LR':
        return linear_model.LogisticRegression()
    elif modelname == 'NN':
        return neighbors.KNeighborsClassifier()
    elif modelname == 'DT':
        return tree.DecisionTreeClassifier()
    elif modelname == 'RF':
        return ensemble.RandomForestClassifier()
    elif modelname == 'NB':
        return naive_bayes.GaussianNB()
    elif modelname == 'SVM':
        return svm.SVC()
    elif modelname == 'ET':
        return ensemble.ExtraTreesClassifier()
    elif modelname == 'SGD':
        return linear_model.SGDClassifier()
    elif modelname == 'AB':
        return ensemble.AdaBoostClassifier(
            tree.DecisionTreeClassifier(max_depth=1)
        )
    elif modelname == 'GB':
        return ensemble.GradientBoostingClassifier()
    elif modelname == 'VC':
        return ensemble.VotingClassifier(estimators=[
            ('RFC', ensemble.RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0)), ('ETC', ensemble.ExtraTreesClassifier(max_depth=None, max_features=5, n_estimators=10, random_state=0, min_samples_split=1)), ('ABC', ensemble.AdaBoostClassifier())],
            voting='soft')
    elif modelname == 'VC2':
        return ensemble.VotingClassifier(estimators=[
            ('LR', linear_model.LogisticRegression(C=0.1, random_state=1)), ('RFC', ensemble.RandomForestClassifier(max_depth=None, n_estimators=10, random_state=0, min_samples_split=1)), ('ETC', ensemble.ExtraTreesClassifier(max_depth=None, max_features=5, n_estimators=10, random_state=0, min_samples_split=1))],
            voting='soft')

    else:
        raise ConfigError("Can't find the model: {}".format(model))

test_voting_classifier.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_predict_proba_on_toy_problem():
    """Calculate predicted probabilities on toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
    y = np.array([1, 1, 2, 2])

    clf1_res = np.array([[0.59790391, 0.40209609],
                         [0.57622162, 0.42377838],
                         [0.50728456, 0.49271544],
                         [0.40241774, 0.59758226]])

    clf2_res = np.array([[0.8, 0.2],
                         [0.8, 0.2],
                         [0.2, 0.8],
                         [0.3, 0.7]])

    clf3_res = np.array([[0.9985082, 0.0014918],
                         [0.99845843, 0.00154157],
                         [0., 1.],
                         [0., 1.]])

    t00 = (2*clf1_res[0][0] + clf2_res[0][0] + clf3_res[0][0]) / 4
    t11 = (2*clf1_res[1][1] + clf2_res[1][1] + clf3_res[1][1]) / 4
    t21 = (2*clf1_res[2][1] + clf2_res[2][1] + clf3_res[2][1]) / 4
    t31 = (2*clf1_res[3][1] + clf2_res[3][1] + clf3_res[3][1]) / 4

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='soft',
                            weights=[2, 1, 1])
    eclf_res = eclf.fit(X, y).predict_proba(X)

    assert_almost_equal(t00, eclf_res[0][0], decimal=1)
    assert_almost_equal(t11, eclf_res[1][1], decimal=1)
    assert_almost_equal(t21, eclf_res[2][1], decimal=1)
    assert_almost_equal(t31, eclf_res[3][1], decimal=1)

    try:
        eclf = VotingClassifier(estimators=[
                                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                                voting='hard')
        eclf.fit(X, y).predict_proba(X)

    except AttributeError:
        pass
    else:
        raise AssertionError('AttributeError for voting == "hard"'
                             ' and with predict_proba not raised')