python类PassiveAggressiveClassifier()的实例源码-面圈网

DeployedClassifier.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def passive_aggressive_train(self):
        '''Trains passive aggressive classifier

        '''
        self._clf = PassiveAggressiveClassifier(n_iter=50, C=0.2, n_jobs=-1, random_state=0)
        self._clf.fit(self._term_doc_matrix._X, self._term_doc_matrix._y)
        y_dist = self._clf.decision_function(self._term_doc_matrix._X)
        pos_ecdf = ECDF(y_dist[y_dist >= 0])
        neg_ecdf = ECDF(y_dist[y_dist <= 0])

        def proba_function(distance_from_hyperplane):
            if distance_from_hyperplane > 0:
                return pos_ecdf(distance_from_hyperplane) / 2. + 0.5
            elif distance_from_hyperplane < 0:
                return pos_ecdf(distance_from_hyperplane) / 2.
            return 0.5

        self._proba = proba_function
        return self

test_termDocMatrixFactory.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_main(self):
        categories, documents = get_docs_categories()
        clean_function = lambda text: '' if text.startswith('[') else text
        entity_types = set(['GPE'])
        term_doc_mat = (
            TermDocMatrixFactory(
                category_text_iter=zip(categories, documents),
                clean_function=clean_function,
                nlp=_testing_nlp,
                feats_from_spacy_doc=FeatsFromSpacyDoc(entity_types_to_censor=entity_types)
            ).build()
        )
        clf = PassiveAggressiveClassifier(n_iter=5, C=0.5, n_jobs=-1, random_state=0)
        fdc = FeatsFromDoc(term_doc_mat._term_idx_store,
                           clean_function=clean_function,
                           feats_from_spacy_doc=FeatsFromSpacyDoc(
                               entity_types_to_censor=entity_types)).set_nlp(_testing_nlp)
        tfidf = TfidfTransformer(norm='l1')
        X = tfidf.fit_transform(term_doc_mat._X)
        clf.fit(X, term_doc_mat._y)
        X_to_predict = fdc.feats_from_doc('Did sometimes march UNKNOWNWORD')
        pred = clf.predict(tfidf.transform(X_to_predict))
        dec = clf.decision_function(X_to_predict)

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))

test_learning_curve.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_class_weights():
    # Test class weights.
    X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
    y2 = [1, 1, 1, -1, -1]

    clf = PassiveAggressiveClassifier(C=0.1, n_iter=100, class_weight=None,
                                      random_state=100)
    clf.fit(X2, y2)
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

    # we give a small weights to class 1
    clf = PassiveAggressiveClassifier(C=0.1, n_iter=100,
                                      class_weight={1: 0.001},
                                      random_state=100)
    clf.fit(X2, y2)

    # now the hyperplane should rotate clock-wise and
    # the prediction on this point should shift
    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_equal_class_weight():
    X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
    y2 = [0, 0, 1, 1]
    clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None)
    clf.fit(X2, y2)

    # Already balanced, so "balanced" weights should have no effect
    clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
                                               class_weight="balanced")
    clf_balanced.fit(X2, y2)

    clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
                                               class_weight={0: 0.5, 1: 0.5})
    clf_weighted.fit(X2, y2)

    # should be similar up to some epsilon due to learning rate schedule
    assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
    assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)

test_passive_aggressive.py 文件源码项目：dask-ml 作者: dask 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_basic(self, single_chunk_classification):
        X, y = single_chunk_classification
        a = lm.PartialPassiveAggressiveClassifier(classes=[0, 1],
                                                  random_state=0,
                                                  max_iter=100, tol=1e-3)
        b = lm_.PassiveAggressiveClassifier(random_state=0, max_iter=100,
                                            tol=1e-3)
        a.fit(X, y)
        b.partial_fit(X, y, classes=[0, 1])
        assert_estimator_equal(a, b, exclude=['loss_function_'])

transact.py 文件源码项目：banking-class 作者: eli-goodfriend 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
            model_type='logreg',C=10.0,
            alpha=1.0, cutoff=0.50, n_iter=1):
    # pull relevant data and run parsing and classification
    df = pd.read_csv(filename) 
    if (len(df.columns)==2): # make sure columns have the right names
        df.columns = ['raw','amount']

    if new_run: # initialize the model;
        if model_type=='logreg':
            model = linear_model.SGDClassifier(loss='log',warm_start=True,
                                           n_iter=n_iter,alpha=alpha)
        elif model_type=='passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
        elif model_type=='naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
    else: # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    fileCities = dirs.data_dir + 'cities_by_state.pickle'
    us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout,index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()


# ------ testing functions

tbs_ml.py 文件源码项目：eezzy 作者: 3Blades 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def generate_base_classification():
    from sklearn.svm import LinearSVC, NuSVC, SVC
    from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
    models = [
        #(LinearSVC, params('C', 'loss')),
#         (NuSVC, params('nu', 'kernel', 'degree')),
        #(SVC, params('C', 'kernel')),
        #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')),
        #(GaussianProcessClassifier, None),
        (LogisticRegression, params('C', 'penalty')),
        #(PassiveAggressiveClassifier, params('C', 'loss')),
        #(RidgeClassifier, params('alpha')),
        # we do in-place modification of what the method params return in order to add
        # more loss functions that weren't defined in the method
        #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])),
        (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({
            'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree']
        })),
        (MultinomialNB, params('alpha')),
        #(GaussianNB, None),
        #(BernoulliNB, params('alpha'))
    ]

    return models

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_classifier_accuracy():
    for data in (X, X_csr):
        for fit_intercept in (True, False):
            clf = PassiveAggressiveClassifier(C=1.0, n_iter=30,
                                              fit_intercept=fit_intercept,
                                              random_state=0)
            clf.fit(data, y)
            score = clf.score(data, y)
            assert_greater(score, 0.79)

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_classifier_partial_fit():
    classes = np.unique(y)
    for data in (X, X_csr):
        clf = PassiveAggressiveClassifier(C=1.0,
                                          fit_intercept=True,
                                          random_state=0)
        for t in range(30):
            clf.partial_fit(data, y, classes)
        score = clf.score(data, y)
        assert_greater(score, 0.79)

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_classifier_refit():
    # Classifier can be retrained on different labels and features.
    clf = PassiveAggressiveClassifier().fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))

    clf.fit(X[:, :-1], iris.target_names[y])
    assert_array_equal(clf.classes_, iris.target_names)

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_classifier_undefined_methods():
    clf = PassiveAggressiveClassifier()
    for meth in ("predict_proba", "predict_log_proba", "transform"):
        assert_raises(AttributeError, lambda x: getattr(clf, x), meth)

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_partial_fit_weight_class_balanced():
    # partial_fit with class_weight='balanced' not supported
    clf = PassiveAggressiveClassifier(class_weight="balanced")
    assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))

test_passive_aggressive.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_wrong_class_weight_label():
    # ValueError due to wrong class_weight label.
    X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
    y2 = [1, 1, 1, -1, -1]

    clf = PassiveAggressiveClassifier(class_weight={0: 0.5})
    assert_raises(ValueError, clf.fit, X2, y2)

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_partial_fit():
    est = PassiveAggressiveClassifier(random_state=0, shuffle=False)
    transformer = SelectFromModel(estimator=est)
    transformer.partial_fit(data, y,
                            classes=np.unique(y))
    old_model = transformer.estimator_
    transformer.partial_fit(data, y,
                            classes=np.unique(y))
    new_model = transformer.estimator_
    assert_true(old_model is new_model)

    X_transform = transformer.transform(data)
    transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
    assert_array_equal(X_transform, transformer.transform(data))

test_from_model.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_warm_start():
    est = PassiveAggressiveClassifier(warm_start=True, random_state=0)
    transformer = SelectFromModel(estimator=est)
    transformer.fit(data, y)
    old_model = transformer.estimator_
    transformer.fit(data, y)
    new_model = transformer.estimator_
    assert_true(old_model is new_model)