python类GaussianNB()的实例源码-面圈网

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None

classifier.py 文件源码项目：rltk 作者: usc-isi-i2 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]

classify.py 文件源码项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers

models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf

sentiment.py 文件源码项目：Twitter-and-IMDB-Sentimental-Analytics 作者: abhinandanramesh 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def build_models_DOC(train_pos_vec, train_neg_vec):
    """
    Returns a GaussianNB and LosticRegression Model that are fit to the training data.
    """
    Y = ["pos"]*len(train_pos_vec) + ["neg"]*len(train_neg_vec)

    # Use sklearn's GaussianNB and LogisticRegression functions to fit two models to the training data.
    # For LogisticRegression, pass no parameters
    train_vec = []
    train_vec.extend(train_pos_vec)
    train_vec.extend(train_neg_vec)

    nb_model = GaussianNB()
    nb_model.fit(train_vec, Y)

    lr_model = LogisticRegression()
    lr_model.fit(train_vec, Y)

    return nb_model, lr_model

learn.py 文件源码项目：XTREE 作者: ai-se 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])

naive_bayes.py 文件源码项目：pyML 作者: tekrei 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def main():
    args = get_args()
    # load and split data
    dataset, target = load_dataset(args.file)
    train_x, train_y, test_x, actual = split_dataset(
        dataset, target, args.split)
    print("Training set size: %d, Testing set size: %d" %
          (len(train_x), len(test_x)))
    # prepare model
    summaries = summarize_by_class(train_x, train_y)
    # test model
    predictions = get_predictions(summaries, test_x)
    display(actual, predictions)
    # using scikit
    gnb = GaussianNB()
    y_pred = gnb.fit(train_x, train_y).predict(test_x)
    display(actual, y_pred)

ClassificationLibCalculator.py 文件源码项目：TextStageProcessor 作者: mhyhre 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def classification_gaussian_nb(self):
        self.signals.PrintInfo.emit("Gaussian NB")
        output_dir = self.output_dir + 'gaussian_nb_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = GaussianNB()
        classificator.fit(trainingSet.toarray(), self.trainingClass)
        results = classificator.predict(testSet.toarray())
        proba = classificator.predict_proba(testSet.toarray())

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_,self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)

trainer.py 文件源码项目：Machine-Learning-Projects 作者: poke19962008 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def train():
    with open('./bin/train.bin', 'rb') as f:
        ds = pickle.load(f)
        XTrain, yTrain = ds['X'], ds['y']
        del ds

    with open('./bin/validation.bin', 'rb') as f:
        ds = pickle.load(f)
        XValidation, yValidation = ds['X'], ds['y']
        del ds

    clf = GaussianNB()
    clf.fit(XTrain, yTrain)

    print "Training Set Length:", XTrain.shape
    print "Test Set Length:", XValidation.shape
    print "Test Scores:", clf.score(XValidation, yValidation)

    with open('./bin/gnbClf.bin', 'wb') as f:
        pickle.dump(clf, f)
        print "[SUCCESS] Saved classifier as `gnbClf.bin`"

main.py 文件源码项目：Sberbank 作者: dimaquick 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def Fit(self, bags, bagData):
        self.Bayes, self.GBayes = [], []
        for i in xrange(10):
            bnb = BernoulliNB()
            gnb = GaussianNB()
            x, y, xg = [], [], []
            for j in xrange(10):
                if i != j:
                    for vv in xrange(len(bagData[j][0])):
                        x.append(self.Convert(bagData[j][0][vv]))
                        xg.append(self.ConvertGauss(bagData[j][0][vv]))
                    y.extend(bagData[j][1])
            bnb.fit(x, y)
            gnb.fit(xg, y)
            self.Bayes.append(bnb)
            self.GBayes.append(gnb)

3.1 Gaussian Bayes.py 文件源码项目：ML-note 作者: JasonK93 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_GaussianNB(*data):
    '''
    Test Gaussian NB
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    cls=naive_bayes.GaussianNB()
    cls.fit(X_train,y_train)
    print('Training Score: {0}' .format( cls.score(X_train,y_train)))
    print('Testing Score: {0}' .format( cls.score(X_test, y_test)))

model.py 文件源码项目：2016CCF-SouGou 作者: AbnerYang 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def GaussianNBPredictModel(localTrainLabel, config):
    train = pd.read_csv('../feature/trainQlist.csv', header = 0, sep = ",")
    test = pd.read_csv('../feature/testQlist.csv', header = 0, sep = ",")
    print "Train tf-idf vector Model..."
    encode = TfidfVectorizer(decode_error = 'ignore', norm = "l2", binary = False, sublinear_tf = True, min_df = 50)
    localTrainFeature = encode.fit_transform(train['qlist'].values)
    localTestFeature = encode.transform(train['qlist'].values)

    print localTrainFeature.shape, localTestFeature.shape

    print 'train...'
    model = GaussianNB()
    model.fit(X = localTrainFeature.toarray(), y = localTrainLabel)
    print 'predict...'
    if config['prob'] == False:
        return model.predict(localTestFeature.toarray()), test['uid'].values
    else:
        return model.predict_log_proba(localTestFeature.toarray()), test['uid'].values

#-- Multinomial Navie Bayes corss validation model frame

test_naive_bayes.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_discretenb_pickle():
    # Test picklability of discrete naive Bayes classifiers

    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
        clf = cls().fit(X2, y2)
        y_pred = clf.predict(X2)

        store = BytesIO()
        pickle.dump(clf, store)
        clf = pickle.load(BytesIO(store.getvalue()))

        assert_array_equal(y_pred, clf.predict(X2))

        if cls is not GaussianNB:
            # TODO re-enable me when partial_fit is implemented for GaussianNB

            # Test pickling of estimator trained with partial_fit
            clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
            clf2.partial_fit(X2[3:], y2[3:])
            store = BytesIO()
            pickle.dump(clf2, store)
            clf2 = pickle.load(BytesIO(store.getvalue()))
            assert_array_equal(y_pred, clf2.predict(X2))

naive_bayes.py 文件源码项目：cs4300sp2017-finalproject 作者: AN313 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def train_classifier_listing(self):
        self.clfListing = GaussianNB()
        files = self.b2s.ls('data/training')
        X = np.zeros((len(files), self.numFeat))
        Y = np.zeros(len(files))
        for i, file in enumerate(files):
            f = file['fileName']
            # read json into feature vector
            if not f.endswith('.json'):
                continue
            textJson = self.b2s.download(f)
            listing = json.loads(textJson)
            X[i] = self.bundle_json_obj(listing)
            Y[i] = max(int(listing['price'] / 50), 10)
        self.clfListing.fit(X, Y)
        temp = tempfile.NamedTemporaryFile()
        joblib.dump(self.clfListing, temp.name)
        self.b2s.upload('classifiers/nb_listing.pkl',
                        temp.read(), 'application/octet-stream')
        return self.clfListing.score(X, Y)

    # train a classifier on description

identify_singing_voice_gender.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions

modelData.py 文件源码项目：rdocChallenge 作者: Elyne 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def getEstimator(es):

    estimator = None
    algo = es.ml_algorithm.upper()
    if algo == 'NAIVEBAYESGAUSSIAN':
        estimator = naive_bayes.GaussianNB()
    elif algo == 'SVM':
        estimator = svm.SVC(kernel=es.svmKernel, degree = 3, C = 0.1, random_state=es.random_seed)
    elif algo == 'RF':
        estimator = RandomForestClassifier(n_estimators=100, random_state=es.random_seed)
    elif algo == 'DECISIONTREE':
        estimator = DecisionTreeClassifier(random_state=es.random_seed)
    elif algo == 'RANDOM':
        estimator = DummyClassifier(random_state=es.random_seed)
    else:
        print("Please enter correct estimator (NaiveBayesGaussian/SVM/RF/DecisionTree)")

    #TODO: add regression?
    return estimator

test_naive_bayes.py 文件源码项目：dask-ml 作者: dask 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_smoke():
    a = nb.GaussianNB()
    b = nb_.GaussianNB()
    a.fit(X, y)
    b.fit(X.compute(), y.compute())

    assert_eq(a.class_prior_.compute(), b.class_prior_)
    assert_eq(a.class_count_.compute(), b.class_count_)
    assert_eq(a.theta_.compute(), b.theta_)
    assert_eq(a.sigma_.compute(), b.sigma_)

    assert_eq(a.predict_proba(X).compute(), b.predict_proba(X_))
    assert_eq(a.predict(X).compute(), b.predict(X_))
    assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))

training.py 文件源码项目：bguFinalProject 作者: liranfar 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def build_naive_bayes_model(x_train, y_train):

    nb_model = GaussianNB()
    nb_model.fit(x_train,y_train.ravel())
    return nb_model

GaussianNaiveBayes.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def createPipeline(self):
        self.pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('model', naive_bayes.GaussianNB())])

test.py 文件源码项目：Audio-classification-using-Bag-of-Frames-approach 作者: amogh3892 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def naive_bayes_predict(training_samples, training_labels, test_samples, test_lables):
    from sklearn.naive_bayes import GaussianNB

    clf = GaussianNB()

    t0 = time()
    clf.fit(training_samples,training_labels)
    training_time = round(time()-t0, 3)

    t0 = time()
    pred = clf.predict(test_samples)
    test_time = round(time()-t0, 3)

    from sklearn.metrics import accuracy_score

    acc = accuracy_score(pred,test_lables)

    no_features = np.array(training_samples).shape[1]
    training_samples = np.array(training_samples).shape[0]
    test_samples = np.array(test_samples).shape[0]

    with open("Temp\\results.txt","w") as outfile:
        outfile.write("Alogirthm : {}\n".format("Naive Bayes"))
        outfile.write("No of features : {}\n".format(no_features))
        outfile.write("No of training samples : {}\n".format(training_samples))
        outfile.write("No of test samples : {}\n".format(test_samples))
        outfile.write("Training time : {}\n".format(training_time))
        outfile.write("Test time : {}\n".format(test_time))
        outfile.write("Accuracy : {}\n".format(acc))

    with open("Temp\\result_labels.csv","wb") as outfile:
        np.savetxt(outfile,pred)

imdb_success_predictor.py 文件源码项目：Movie-Success-Predictor 作者: Blueteak 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def main():
    #before_release
    movie_info_before_release = load_movie_info_before_release()
    print '***Before release***'

    X = create_input(movie_info_before_release)
    Y = create_output_before_release(movie_info_before_release)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'before_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'before_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'before_release')

    #After release
    movie_info = load_movie_info()
    print '***After release***' 

    X = create_input(movie_info)
    Y = create_output(movie_info)

    clf = linear_model.SGDClassifier(loss='log')
    test_classifier(clf, X, Y, 'after_release')

    clf = GaussianNB()
    test_classifier(clf, X, Y, 'after_release')

    clf = RandomForestClassifier(n_estimators=10, max_depth=10)
    test_classifier(clf, X, Y, 'after_release')

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def nb_xyat_weight1(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return df_new
    logging.info("train nb_xyat_weight1 model")
    clf = GaussianNB()
    clf.fit(prepare_feats(df_cell_train_feats), y_train, df_cell_train_feats["time"] ** 2)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred

transact.py 文件源码项目：banking-class 作者: eli-goodfriend 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
            model_type='logreg',C=10.0,
            alpha=1.0, cutoff=0.50, n_iter=1):
    # pull relevant data and run parsing and classification
    df = pd.read_csv(filename) 
    if (len(df.columns)==2): # make sure columns have the right names
        df.columns = ['raw','amount']

    if new_run: # initialize the model;
        if model_type=='logreg':
            model = linear_model.SGDClassifier(loss='log',warm_start=True,
                                           n_iter=n_iter,alpha=alpha)
        elif model_type=='passive-aggressive':
            model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
        elif model_type=='naive-bayes':
            model = naive_bayes.GaussianNB()
        else:
            raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
    else: # load a saved, pre-trained model
        modelFileLoad = open(modelname, 'rb')
        model = pickle.load(modelFileLoad)

    fileCities = dirs.data_dir + 'cities_by_state.pickle'
    us_cities = pd.read_pickle(fileCities)

    df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff,
                model_type=model_type)

    df.to_csv(fileout,index=False)

    # Saving logistic regression model from training set 1
    modelFileSave = open(modelname, 'wb')
    pickle.dump(model, modelFileSave)
    modelFileSave.close()


# ------ testing functions

solve.py 文件源码项目：qlcoder 作者: L1nwatch 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def solve():
    # ????????????
    training_arr = numpy.loadtxt('adult.txt', dtype=bytes, comments='#', delimiter=',')
    test_data = numpy.loadtxt('adult_test.txt', dtype=bytes, comments='#', delimiter=',')

    # ??? x ???? y ???, ?????
    # x_list = numpy.ndarray(len(training_arr))
    # y_list = numpy.ndarray(len(training_arr))

    # ????? y ?
    y_list = [int(element[12]) for element in training_arr]
    # ????, ??????? x ?
    x_list = [[transform(x) for x in element[0:12]] for element in training_arr]
    # ??????
    test_data = [[transform(x) for x in element] for element in test_data]

    assert isinstance(y_list[0], int)  # y ??????? 0 ? 1, ? int
    assert len(x_list[0]) == 12  # x ????? 12 ???, ???? 12 ?????
    assert len(test_data[0]) == 12  # ????? x ??

    clf = GaussianNB()
    clf.partial_fit(x_list, y_list, numpy.unique(y_list))  # clf.fit(x_list, y_list) ?????
    res_arr = clf.predict(test_data)
    partial_fit_result = "".join([str(x) for x in res_arr])

    print("[*] ??????: {}".format(partial_fit_result))

__init__.py 文件源码项目：tidml 作者: tidchile 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def train(self, pd):
        model = naive_bayes.GaussianNB()
        model.fit(pd.data, pd.target)
        print model
        return model

main.py 文件源码项目：semihin 作者: HKUST-KnowComp 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def nb_experiment(scope_name, X, y):
    for lp in lp_cand:
        results = []
        for r in range(50):
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_train') as f:
                trainLabel = pk.load(f)
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_test') as f:
                testLabel = pk.load(f)

            XTrain = X[trainLabel.keys()]
            XTest = X[testLabel.keys()]
            if not isinstance(XTrain, np.ndarray):
                XTrain = XTrain.toarray()
                XTest = XTest.toarray()
            yTrain = y[trainLabel.keys()]
            yTest = y[testLabel.keys()]

            # train
            #clf = MultinomialNB()
            clf = GaussianNB()
            #clf = BernoulliNB()
            clf.fit(XTrain, yTrain)

            # test
            pred = clf.predict(XTest)
            results.append(sum(pred == yTest) / float(yTest.shape[0]))
        return np.mean(results)

classifier.py 文件源码项目：DistributedClassifier 作者: rsboos 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def MakeClassification(index,instancesData,classesData,instancesTest,type="proba",classifiersType="normal"):
    classifiers = [
    OneVsRestClassifier(sklearn.svm.SVC(probability=1),4),
    DecisionTreeClassifier(random_state=0),
    KNeighborsClassifier(n_jobs=4),
    MLPClassifier(),
    sklearn.svm.SVC(probability=1,decision_function_shape="ovo"),
    OutputCodeClassifier(LinearSVC(random_state=0),code_size=2, random_state=0)
    ]
    if (classifiersType == "ova"):
        classifiers = [
            OneVsRestClassifier(sklearn.svm.SVC(probability=1),4),
            OneVsRestClassifier(DecisionTreeClassifier(random_state=0),4),
            OneVsRestClassifier(KNeighborsClassifier(),4),
            OneVsRestClassifier(MLPClassifier(),4),
            OneVsRestClassifier(GaussianNB(),4)
        ]
    if (index >= len(classifiers)):
        print "ERROR. The index is not valid."
        return None
    else:
        #print "Performing classification"
        if type == "proba":
            return classifiers[index].fit(instancesData,classesData).predict_proba(instancesTest)
        else:
            return classifiers[index].fit(instancesData,classesData).predict(instancesTest)

bayes_Trainer.py 文件源码项目：Spam-Message-Classifier-sklearn 作者: ZPdesu 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def __init__(self, training_data, training_target):
        self.training_data = training_data
        self.training_target = training_target
        self.clf = GaussianNB()

bayes_Trainer.py 文件源码项目：Spam-Message-Classifier-sklearn 作者: ZPdesu 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def bayes_train(train_data, train_target):

    model = GaussianNB()
    model.fit(train_data, train_target)
    expected = train_target
    predicted = model.predict(train_data)
    # summarize the fit of the model
    print metrics.classification_report(expected, predicted)
    print metrics.confusion_matrix(expected, predicted)

otherModelForComparison.py 文件源码项目：CreditScoring 作者: cqw5 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def NB(train_x, train_y, test_x, test_y):
    """ ????? """
    classifier = GaussianNB()
    classifier.fit(train_x, train_y)
    pred = classifier.predict_proba(test_x)
    predict_pro = []
    for pro in pred:
        predict_pro.append(pro[1])
    predict_y = classifier.predict(test_x)
    auc = evaluate_auc(predict_pro, test_y)
    evaluate(predict_y, test_y)
    return auc