python类SVC的实例源码

ClassificationLibCalculator.py 文件源码 项目:TextStageProcessor 作者: mhyhre 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def classification_rbf_svm(self):
        self.signals.PrintInfo.emit("RBF SVM")
        output_dir = self.output_dir + 'rbf_svm_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = SVC(gamma=2, probability=True, C=self.rbf_svm_c)
        classificator.fit(trainingSet, self.trainingClass)
        results = classificator.predict(testSet)
        proba = classificator.predict_proba(testSet)

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_,self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)
behaviorView.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        self.classifier = SVC()
        self.classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}
        for user in self.testDict:
            pred_labels[user] = self.classifier.predict([[self.BDS[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel

        return pred_labels
unigram.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)

        corpus = [dictionary.doc2bow(text) for text in corpus]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
TF_IDF.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)
        corpus = [dictionary.doc2bow(text) for text in corpus]
        model = models.TfidfModel(corpus)
        corpus = [text for text in model[corpus]]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
DegreeSAD.py 文件源码 项目:OpinionSpam 作者: Coder-Yu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)
        pred_labels = {}
        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)

        for user in self.testDict:
            pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
classifier.py 文件源码 项目:stock-price-prediction 作者: chinuy 项目源码 文件源码 阅读 57 收藏 0 点赞 0 评论 0
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
result.py 文件源码 项目:Graduation-design 作者: Baichenjia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def Training_model():
    #????????????
    f = open("f://emotion/mysite/weibo_emotion/emotion_file/data_count.txt")   # ???????????
    f.readline()   # ????
    data = np.loadtxt(f)
    #?????????
    f1 = open("f://emotion/mysite/weibo_emotion/emotion_file/data_jixing.txt")
    leibie = np.loadtxt(f1)
    f.close()
    f1.close()

    #TF-IDF??
    transformer = TfidfTransformer()
    tfidf = transformer.fit_transform(data)
    data1 = tfidf.toarray()

    #SVM?????
    clf = svm.SVC()   # class
    clf.fit(data1, leibie)    # training the svc model
    return clf
svm_video.py 文件源码 项目:LogoDetectionInVideo 作者: nmemme 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train():
    training_set=[]
    training_labels=[]
    os.chdir("/Users/muyunyan/Desktop/EC500FINAL/logo/")
    counter=0
    a=os.listdir(".")
    for i in a:
     os.chdir(i)
     print(i)
     for d in os.listdir("."):
         img = cv2.imread(d)
         res=cv2.resize(img,(250,250))
         gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
         xarr=np.squeeze(np.array(gray_image).astype(np.float32))
         m,v=cv2.PCACompute(xarr)
         arr= np.array(v)
         flat_arr= arr.ravel()
         training_set.append(flat_arr)
         training_labels.append(i)
     os.chdir("..")
     trainData=training_set
     responses=training_labels
     svm = svm.SVC()
     svm.fit(trainData,responses)
     return svm
active_learning.py 文件源码 项目:Steal-ML 作者: ftramer 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def CAL_v(name, label_p, label_n, oracle, n_features, ftype, test_x, test_y):
    online = OnlineBase(name, label_p, label_n, oracle, n_features, ftype, error=.5)
    x, y = online.collect_pts(100, -1)
    i = 0
    q = online.get_n_query()
    C_range = np.logspace(-2, 5, 10, base=10)
    gamma_range = np.logspace(-5, 1, 10, base=10)
    param_grid = dict(gamma=gamma_range, C=C_range)
    while q < 3500:
        i += 1
        # h_ = ex.fit(x, y)

        cv = StratifiedShuffleSplit(y, n_iter=5, test_size=0.2, random_state=42)
        grid = GridSearchCV(svm.SVC(), param_grid=param_grid, cv=cv, verbose=0, n_jobs=-1)
        grid.fit(x, y)
        h_ = grid.best_estimator_

        online_ = OnlineBase('', label_p, label_n, h_.predict, n_features, ftype, error=.1)
        x_, _ = online_.collect_pts(10, 200)
        if x_ is not None and len(x_) > 0:
            x.extend(x_)
            y.extend(oracle(x_))
        q += online_.get_n_query()
        pred_y = h_.predict(test_x)
        print len(x), q, sm.accuracy_score(test_y, pred_y)
RBFTrainer.py 文件源码 项目:Steal-ML 作者: ftramer 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def grid_retrain_in_x(self):
        gamma_range = np.logspace(-15, 3, 19, base=2)
        param_grid = dict(gamma=gamma_range)

        if len(np.unique(self.y_ex)) < 2:
            return 1, 1

        try:
            cv = StratifiedShuffleSplit(self.y_ex, n_iter=5, test_size=.2)
            grid = GridSearchCV(SVC(C=1e5), param_grid=param_grid, cv=cv, n_jobs=-1)

            grid.fit(self.X_ex, self.y_ex)
            rbf_svc2 = grid.best_estimator_
        except ValueError:
            rbf_svc2 = SVC(C=1e5)
            rbf_svc2.fit(self.X_ex, self.y_ex)

        self.set_clf2(rbf_svc2)
        return self.benchmark()
PolyTrainer.py 文件源码 项目:Steal-ML 作者: ftramer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def grid_search(self):
        C_range = np.logspace(-5, 15, 21, base=2)
        param_grid = dict(C=C_range)
        cv = StratifiedShuffleSplit(self.y_ex, n_iter=5, test_size=0.2, random_state=42)
        grid = GridSearchCV(SVC(kernel='poly', max_iter=10000), param_grid=param_grid, cv=cv, n_jobs=1, verbose=0)

        logger.info('start grid search for Linear')
        grid.fit(self.X_ex, self.y_ex)
        logger.info('end grid search for Linear')

        scores = [x[1] for x in grid.grid_scores_]

        # final train
        clf = grid.best_estimator_

        pred_train = clf.predict(self.X_ex)
        pred_val = clf.predict(self.val_x)
        pred_test = clf.predict(self.test_x)

        r = Result(self.name + ' (X)', 'Poly', len(self.X_ex),
                   sm.accuracy_score(self.y_ex, pred_train),
                   sm.accuracy_score(self.val_y, pred_val),
                   sm.accuracy_score(self.test_y, pred_test))
        return r
parkinson.py 文件源码 项目:Parkinsons-Vocal-Analysis-Model 作者: WilliamY97 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def fit_model(X, y):

    classifier = svm.SVC()

    parameters = {'kernel':['poly', 'rbf', 'sigmoid'], 'degree':[1, 2, 3], 'C':[0.1, 1, 10]}


    f1_scorer = make_scorer(performance_metric,
                                   greater_is_better=True)

    clf = GridSearchCV(classifier,
                       param_grid=parameters,
                       scoring=f1_scorer)

    clf.fit(X, y)

    return clf


# Read student data
data_analysis.py 文件源码 项目:algo-trading-pipeline 作者: NeuralKnot 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def create_model(self, training_articles):
        model = OneVsRestClassifier(svm.SVC(probability=True))

        features = []
        labels = []
        i = 0
        for article in training_articles:
            print("Generating features for article " + str(i) + "...")
            google_cloud_response = self.analyze_text_google_cloud(article["article"])
            relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])

            # Only count this article if a relevant entity is present
            if relevant_entities:
                article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
                features.append(article_features)
                labels.append(article["label"])
            else:
                print("Skipping article " + str(i) + "...")

            i = i + 1

        print("Performing feature scaling...")
        scaler = preprocessing.StandardScaler().fit(features)
        features_scaled = scaler.transform(features)

        print("Fitting model...")
        model.fit(features_scaled, labels)

        print("Saving model...")
        joblib.dump(scaler, "data_analysis/caler.pkl")
        joblib.dump(model, "data_analysis/model.pkl")

        print("Done!")

    # For use in prod
identify_singing_voice_gender.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
util.py 文件源码 项目:code-uai16 作者: thanhan 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def classify(n = 50):
    #clf = MultinomialNB(fit_prior=False)
    #clf = SVC(gamma=2, C=1, class_weight = {0.0:0.063829777, 1.0:1.0})
    clf = SGDClassifier(loss="log", penalty="l1", class_weight = {0.0:0.022, 1.0:1.0})

    clf.fit(mat[:n], rel[:n])
    return clf
solution.py 文件源码 项目:Kaggle 作者: lawlite19 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def baseline_svm():
    train_data = pd.read_csv(r"data/train.csv")
    print u"?????\n",train_data.info()
    print u'?????\n',train_data.describe()  
    #display_data(train_data)  # ????????
    #display_with_process(train_data) # ??????????????????,????
    process_data = pre_processData(train_data,'process_train_data')  # ????????????
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # ???????????
    train_np = train_data.as_matrix()  # ????
    '''??model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    model = svm.SVC(C=1.0,tol=1e-6).fit(X,y)
    # print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})

    '''??????'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data')  # ?????
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'baseline_svm_result/prediction.csv',index=False)    



# baseline???????——0.76077
solution.py 文件源码 项目:Kaggle 作者: lawlite19 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def baseline_svm_crossValidate():
    origin_train_data = pd.read_csv(r"data/train.csv")
    process_data = pre_processData(origin_train_data,'process_train_data')  # ????????????
    process_data_train,process_data_cv = train_test_split(process_data,test_size=0.2)   
    train_data = process_data_train.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # ???????????
    train_np = train_data.as_matrix()  # ????
    '''??model'''
    X_train = train_np[:,1:]
    y_train = train_np[:,0]
    model = svm.SVC(kernel='rbf',tol=1e-6).fit(X_train,y_train)
    #print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})
    cv_data = process_data_cv.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    cv_np = cv_data.as_matrix()
    X_cv = cv_np[:,1:]
    y_cv = cv_np[:,0]
    predictions = model.predict(X_cv)
    print np.float32(np.sum(predictions == y_cv))/np.float32(predictions.shape[0])

    error_items = origin_train_data.loc[origin_train_data['PassengerId'].isin(process_data_cv[predictions != y_cv]['PassengerId'].values)]
    predictions_item = pd.DataFrame(data=process_data_cv[predictions != y_cv]['PassengerId'])
    predictions_item.columns=['error_PassengerId']
    # error_items = error_items.reset_index(drop=True)
    error_result = pd.concat([error_items,predictions_item],axis=1)
    error_result.to_csv(r'error.csv',index=False)


    '''??????'''
    '''test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data',optimize=False)  # ?????
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'svm_result/prediction.csv',index=False)'''



# baseline crossValidate???????——??????
model.py 文件源码 项目:deeppavlov 作者: deepmipt 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def svc_model(self):
        model = svm.SVC(probability=True, C=0.3, kernel='linear')
        return model
load_feature.py 文件源码 项目:EmotiW-2017-Audio-video-Emotion-Recognition 作者: xujinchang 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def use_SVM(X_data,y_data):
    p_gamma = 0.1
    p_C = 10
    svm = SVC(kernel = 'rbf',random_state=0, gamma=p_gamma ,C=p_C, probability=True)
    svm.fit(X_data,y_data)
    joblib.dump(svm,"./sklearn_model/svm_trainval1_{param1}_{param2}".format(param1 = p_gamma,param2 = p_C))
    return svm
bci_workshop_tools.py 文件源码 项目:Wall-EEG 作者: neurotechuoft 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def classifier_train(feature_matrix_0, feature_matrix_1, algorithm = 'SVM'):
    """
    Trains a binary classifier using the SVM algorithm with the following parameters

    Arguments
    feature_matrix_0: Matrix with examples for Class 0
    feature_matrix_0: Matrix with examples for Class 1
    algorithm: Currently only SVM is supported

    Outputs
    classfier: trained classifier (scikit object)
    mu_ft, std_ft: normalization parameters for the data
    """
    # Create vector Y (class labels)
    class0 = np.zeros((feature_matrix_0.shape[0],1))
    class1 = np.ones((feature_matrix_1.shape[0],1))

    # Concatenate feature matrices and their respective labels
    y = np.concatenate((class0, class1),axis=0)
    features_all = np.concatenate((feature_matrix_0, feature_matrix_1),axis=0)

    # Normalize inputs
    mu_ft = np.mean(features_all)
    std_ft = np.std(features_all)
    X = (features_all - mu_ft) / std_ft

    # Train SVM, using default parameters     
    classifier = svm.SVC()
    classifier.fit(X, y)

    return classifier, mu_ft, std_ft


问题


面经


文章

微信
公众号

扫码关注公众号