python类SVC的实例源码

test_feature_importances.py 文件源码 项目:triage 作者: dssg 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
classification.py 文件源码 项目:sef 作者: passalis 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def evaluate_svm(train_data, train_labels, test_data, test_labels, n_jobs=-1):
    """
    Evaluates a representation using a Linear SVM
    It uses 3-fold cross validation for selecting the C parameter
    :param train_data:
    :param train_labels:
    :param test_data:
    :param test_labels:
    :param n_jobs:
    :return: the test accuracy
    """

    # Scale data to 0-1
    scaler = MinMaxScaler()
    train_data = scaler.fit_transform(train_data)
    test_data = scaler.transform(test_data)

    parameters = {'kernel': ['linear'], 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}
    model = svm.SVC(max_iter=10000)
    clf = grid_search.GridSearchCV(model, parameters, n_jobs=n_jobs, cv=3)
    clf.fit(train_data, train_labels)
    lin_svm_test = clf.score(test_data, test_labels)
    return lin_svm_test
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 82 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
classifier.py 文件源码 项目:rltk 作者: usc-isi-i2 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
models_classification.py 文件源码 项目:easyML 作者: aarshayj 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=SVC(), data_block=data_block, predictors=predictors,
            cv_folds=cv_folds,scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output=pd.Series(self.default_parameters)
        self.model_output['Coefficients'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)

        #Check if probabilities enables:
        if not self.alg.get_params()['probability']:
            self.probabilities_available = False
quiz5.py 文件源码 项目:ML_NTU 作者: LeoTsui 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def quiz15():
    X, Y, N = read_file("features.train")
    Y_0 = (Y == 0).astype(int)

    c_l = []
    w_l = []
    for i in range(-6, 4, 2):
        c = 10 ** i
        c_l.append(c)
        clf = svm.SVC(C=c, kernel='linear', shrinking=False)
        clf.fit(X, Y_0)
        w = clf.coef_.flatten()
        norm_w = np.linalg.norm(w, ord=2)
        w_l.append(norm_w)
        print("C = ", c, '    norm(w) =', norm_w)

    plt.semilogx(c_l, w_l)
    plt.savefig("h5_q15.png", dpi=300)
svm.py 文件源码 项目:MultimodalAutoencoder 作者: natashamjaques 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def train_and_predict(self, param_dict, predict_on='val'):
        """Initializes an SVM classifier according to the desired parameter settings, 
        trains it, and returns the predictions on the appropriate evaluation dataset.

        Args:
            param_dict: A dictionary with keys representing parameter names and 
                values representing settings for those parameters.
            predict_on: The dataset used for evaluating the model. Can set to 
                'Test' to get final results.

        Returns: The predicted Y labels.
        """
        if predict_on == 'test':
            predict_X = self.data_loader.test_X
        else:
            predict_X = self.data_loader.val_X

        self.model = SVC(C=param_dict['C'], kernel=param_dict['kernel'], gamma=param_dict['beta'])
        self.model.fit(self.data_loader.train_X, self.data_loader.train_Y)
        preds = self.predict_on_data(predict_X)

        return preds
classification.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
classification.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    )
classification.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
classification.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
                                                                 num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data, raw_data2)), labels,
            num_training_samples=num_training_samples)
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))

# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
test_searchgrid.py 文件源码 项目:searchgrid 作者: jnothman 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[set_grid(clf1, kernel=['linear']),
                              clf2,
                              set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                              clf4])
    param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
                  {'clf': [clf3], 'clf__kernel': ['poly'],
                   'clf__degree': [2, 3], 'sel__k': [2, 3]},
                  {'clf': [clf2, clf4], 'sel__k': [2, 3]}]
    assert build_param_grid(estimator) == param_grid
test_searchgrid.py 文件源码 项目:searchgrid 作者: jnothman 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_make_grid_search():
    X, y = load_iris(return_X_y=True)
    lr = LogisticRegression()
    svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
    gs1 = make_grid_search(lr, cv=5)  # empty grid
    gs2 = make_grid_search(svc, cv=5)
    gs3 = make_grid_search([lr, svc], cv=5)
    for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
        gs.fit(X, y)
        assert gs.cv == 5
        assert len(gs.cv_results_['params']) == n_results

    svc_mask = gs3.cv_results_['param_root'] == svc
    assert svc_mask.sum() == 2
    assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
    assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
LIEGE.py 文件源码 项目:entity-linker 作者: seucs 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def train(train_dataTables, human_marks):
    global classifier
    samples =[]
    target = []

    for nn, dataTable in enumerate(train_dataTables):
        for i in xrange(dataTable.row):
            for j in xrange(dataTable.col):
                mention = dataTable[i][j]
                if mention.cid == -1:
                    continue
                eids = dataTable.get_eids(i, j)
                words = dataTable.get_words(i, j)
                entites = dataTable.get_entities(i ,j)
                true_id = human_marks[nn][i][j]['id']
                for ii, entity in enumerate(mention.candidates):
                    prior = entity.popular
                    SR = mention.getSR(ii, entites)
                    res = int(true_id == entity.id)
                    samples.append([prior, SR])
                    target.append(res)

    from sklearn import svm   
    classifier = svm.SVC(probability=True)
    classifier.fit(samples, target)
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def run(self):
        training_x, training_y, training_ids = self.get_training_data()
        test_x, test_y, test_ids = self.get_test_data()
        clf = self.define_model(self.model_name, self.model_params)
        clf.fit(training_x, training_y)
        res_predict = clf.predict(test_x)
        if (self.model_name == "SGDClassifier" and (clf.loss =="hinge" or clf.loss == "perceptron")) or self.model_name == "linear.SVC":
            res = list(clf.decision_function(test_x))
        else:
            res = list(clf.predict_proba(test_x)[:,1])
        #fp, fn, tp, tn = self.compute_confusion_matrix(res[:,0], test_y)
        result_dictionary = {'training_ids': training_ids, 
                             'predictions_test_y': list(res_predict),
                             'prob_prediction_test_y': res ,
                             'test_y': list(test_y),
                             'test_ids': list(test_ids),
                             'model_name': self.model_name,
                             'model_params': self.model_params,
                             'label': self.label,
                             'feature_columns_used': self.cols_to_use,
                             'config': self.config,
                             'feature_importance': self.get_feature_importance(clf, self.model_name),
                             'columned_used_for_feat_importance': list(training_x.columns.values)}
        return  result_dictionary, clf
gater_seq.py 文件源码 项目:MixtureOfExperts 作者: krishnakalyan3 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def svc_model(self, X, y, x_test, y_test, x_val, y_val, i, j):
        X, y = shuffle(X, y, random_state=self.SEED)
        clf = SVC(C=self.C, kernel='rbf', gamma=self.gamma, cache_size=self.cache_size,
                  verbose=0, random_state=self.SEED)
        model = clf.fit(X, y)

        yhat_train = model.predict(X)
        yhat_val = model.predict(x_val)
        yhat_test = model.predict(x_test)

        train_error = (1 - accuracy_score(y, yhat_train)) * 100
        val_error = (1 - accuracy_score(y_val, yhat_val)) * 100
        test_error = (1 - accuracy_score(y_test, yhat_test)) * 100

        self.warn_log.append([i, train_error, val_error, test_error])

        return model
ClassificationSVM.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, isTrain, isOutlierRemoval=0):
        """
        The linear models ``LinearSVC()`` and ``SVC(kernel='linear')`` yield slightly
        different decision boundaries. This can be a consequence of the following
        differences:
        - ``LinearSVC`` minimizes the squared hinge loss while ``SVC`` minimizes the
          regular hinge loss.

        - ``LinearSVC`` uses the One-vs-All (also known as One-vs-Rest) multiclass
          reduction while ``SVC`` uses the One-vs-One multiclass reduction.
        :return:
        """
        super(ClassificationSVM, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()
        self.clf = svm.SVC() # define the SVM classifier

        C = 1.0  # SVM regularization parameter
        self.svc = svm.SVC(kernel='linear', C=C, max_iter=100000)
        self.rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C)
        self.poly_svc = svm.SVC(kernel='poly', coef0=1, degree=3, C=C)
        self.lin_svc = svm.LinearSVC(C=C)
ClassificationSVM.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
utils.py 文件源码 项目:LINE 作者: VahidooX 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def svm_classify(X, label, split_ratios, C):
    """
    trains a linear SVM on the data
    input C specifies the penalty factor for SVM
    """
    train_size = int(len(X)*split_ratios[0])
    val_size = int(len(X)*split_ratios[1])

    train_data, valid_data, test_data = X[0:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
    train_label, valid_label, test_label = label[0:train_size], label[train_size:train_size + val_size], label[train_size + val_size:]

    print('training SVM...')
    clf = svm.SVC(C=C, kernel='linear')
    clf.fit(train_data, train_label.ravel())

    p = clf.predict(train_data)
    train_acc = accuracy_score(train_label, p)
    p = clf.predict(valid_data)
    valid_acc = accuracy_score(valid_label, p)
    p = clf.predict(test_data)
    test_acc = accuracy_score(test_label, p)

    return [train_acc, valid_acc, test_acc]
classification.py 文件源码 项目:CAAPR 作者: Stargrazer82301 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setup(self):

        """
        This function ...
        :return:
        """

        # Call the setup of the base class
        super(Classifier, self).setup()

        # Create the vector classifier
        self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification

        # Determine the path to the collection directory for the current mode
        collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)

        # Determine the paths to the 'yes' and 'no' saturation collection directories
        self.yes_path = os.path.join(collection_mode_path, "yes")
        self.no_path = os.path.join(collection_mode_path, "no")

        # Determine the path to the classification directory for the current mode
        self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)

    # -----------------------------------------------------------------
classification.py 文件源码 项目:CAAPR 作者: Stargrazer82301 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def setup(self):

        """
        This function ...
        :return:
        """

        # Call the setup of the base class
        super(Classifier, self).setup()

        # Create the vector classifier
        self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification

        # Determine the path to the collection directory for the current mode
        collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)

        # Determine the paths to the 'yes' and 'no' saturation collection directories
        self.yes_path = os.path.join(collection_mode_path, "yes")
        self.no_path = os.path.join(collection_mode_path, "no")

        # Determine the path to the classification directory for the current mode
        self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)

    # -----------------------------------------------------------------
test_model_selection.py 文件源码 项目:dask-searchcv 作者: dask 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_visualize():
    pytest.importorskip('graphviz')

    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = SVC(random_state=0)
    grid = {'C': [.1, .5, .9]}
    gs = dcv.GridSearchCV(clf, grid).fit(X, y)

    assert hasattr(gs, 'dask_graph_')

    with tmpdir() as d:
        gs.visualize(filename=os.path.join(d, 'mydask'))
        assert os.path.exists(os.path.join(d, 'mydask.png'))

    # Doesn't work if not fitted
    gs = dcv.GridSearchCV(clf, grid)
    with pytest.raises(NotFittedError):
        gs.visualize()
with_csv.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    SVM binary Classification
    """
    # c = parameters[0]
    # g =  parameters[1]
    clf = SVC()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
apply_ml1.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def performSVMClass(X_train, y_train, X_test, y_test):
    classifier = svm.SVC()
    classifier.fit(X_train, y_train)
    results = classifier.predict(X_test)

    # colors = {1:'red', 0:'blue'}
    # df = pd.DataFrame(dict(adj=X_test[:,5], return_=X_test[:,50], label=results))

    # fig, ax = plt.subplots()
    # colors = {1:'red', 0:'blue'}
    # ax.scatter(df['adj'],df['return_'], c=df['label'].apply(lambda x: colors[x]))
    # # ax.scatter(X_test[:,5], X_test[:,50], c=y_test_list.apply(lambda x: colors[x]))
    # plt.show()
    # print y_pred
    # cm = confusion_matrix(y_test, results)
    # print cm
    # plt.figure()
    # plot_confusion_matrix(cm)
    # plt.show()

    num_correct = (results == y_test).sum()
    recall = num_correct / len(y_test)
    # print "SVM model accuracy (%): ", recall * 100, "%"

    return recall*100
sp_start.py 文件源码 项目:stock_trend_prediction 作者: r12543 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    SVM binary Classification
    """
    # c = parameters[0]
    # g =  parameters[1]
    clf = SVC()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "SVM: ", accuracy
TMDetection.py 文件源码 项目:US-TransportationMode 作者: vlomonaco 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def support_vector_machine(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("SUPPORT VECTOR MACHINE.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features)

        classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False)
        classifier_svm.fit(train_features_scaled, train_classes)
        test_prediction = classifier_svm.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END SUPPORT VECTOR MACHINE.....")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f:
            f.write(file_content)

    # use different algorithms changing target classes, try all combination of two target classes
test_io_types.py 文件源码 项目:coremltools 作者: apple 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_support_vector_classifier(self):
        for dtype in self.number_data_type.keys():
            scikit_model = SVC(kernel='rbf', gamma=1.2, C=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            coreml_model = create_model(spec)
            for idx in range(0, 10):
                test_data = data[idx].reshape(1, -1)
                try:
                    self.assertEqual(scikit_model.predict(test_data)[0],
                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
                                     msg="{} != {} for Dtype: {}".format(
                                         scikit_model.predict(test_data)[0],
                                         bool(int(coreml_model.predict({'data': test_data})['target'])),
                                         dtype
                                     )
                                     )
                except RuntimeError:
                    print("{} not supported. ".format(dtype))
train_classifier.py 文件源码 项目:data_programming 作者: kep1616 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def learn(training_data, training_labels, show_score=False, store=False):

    print ("Start Learning....")

    clf = SVC(kernel='linear', probability=True, C=1)

    clf.fit(training_data, training_labels)

    print ("Done Learning.")


    if store:
        print ("Pickling classifier...")
        pickle.dump(clf, open(path_config.CLASSIFIER_PICKLING_FILE, 'wb'))
        print ("Done Pickling.")

    if show_score:
        print ("Scoring classifier ...")
        print ("Data-Level Training Set Prediction Accuracy: %s" % clf.score(training_data, training_labels))
ClassificationLibCalculator.py 文件源码 项目:TextStageProcessor 作者: mhyhre 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def classification_linear_svm(self):
        self.signals.PrintInfo.emit("???????? Linear SVM")
        output_dir = self.output_dir + 'linear_svm_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = SVC(kernel="linear", probability=True, C=self.linear_svm_c)
        classificator.fit(trainingSet, self.trainingClass)
        results = classificator.predict(testSet)
        proba = classificator.predict_proba(testSet)

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_, self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)


问题


面经


文章

微信
公众号

扫码关注公众号