def sub_NB(train_x, train_y, test_x, test_y):
""" ???????? """
classifier = GaussianNB()
classifier.fit(train_x, train_y)
pred = classifier.predict_proba(test_x)
predict_pro = []
for pro in pred:
predict_pro.append(pro[1])
return np.array(predict_pro)
python类GaussianNB()的实例源码
def _init_model(self):
return GaussianNB()
def generate_base_classification():
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
models = [
#(LinearSVC, params('C', 'loss')),
# (NuSVC, params('nu', 'kernel', 'degree')),
#(SVC, params('C', 'kernel')),
#(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
(DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
(RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')),
#(GaussianProcessClassifier, None),
(LogisticRegression, params('C', 'penalty')),
#(PassiveAggressiveClassifier, params('C', 'loss')),
#(RidgeClassifier, params('alpha')),
# we do in-place modification of what the method params return in order to add
# more loss functions that weren't defined in the method
#(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])),
(KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({
'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree']
})),
(MultinomialNB, params('alpha')),
#(GaussianNB, None),
#(BernoulliNB, params('alpha'))
]
return models
def __init__(self):
SingleClassifier.SingleClassifier.__init__(self)
#weak classifier
self.clf=GaussianNB()
def __init__(self):
self.learner = GaussianNB()
def get_naive_bayes(self):
"""get naive bayes algorithm"""
return GaussianNB()
def define_clfs_params(self):
'''
Defines all relevant parameters and classes for classfier objects.
Edit these if you wish to change parameters.
'''
# These are the classifiers
self.clfs = {
'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
'LR': LogisticRegression(penalty = 'l1', C = 1e5),
'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
'NB': GaussianNB(),
'DT': DecisionTreeClassifier(),
'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
'KNN': KNeighborsClassifier(n_neighbors = 3)
}
# These are the parameters which will be run through
self.params = {
'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
'NB': {},
'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
}
def runner(i):
sem.acquire()
print("learn begin %s" % i)
clf = ensemble.BaggingClassifier(naive_bayes.GaussianNB())
clf = clf.fit(traindata, trainlabel[i])
svms.append((i, clf))
result[i] = clf.predict_proba(testdata)
dbresult[i] = clf.predict_proba(dbdata)
#print("label %s done\n%s"
# % (i, metrics.classification_report(testlabel[i], result[i])))
#print metrics.confusion_matrix(testlabel[i], result)
sem.release()
def runner(i):
sem.acquire()
print("learn begin %s" % i)
clf = ensemble.AdaBoostClassifier(naive_bayes.GaussianNB())
clf = clf.fit(traindata, trainlabel[i])
svms.append((i, clf))
result[i] = clf.predict_proba(testdata)
dbresult[i] = clf.predict_proba(dbdata)
#print("label %s done\n%s"
# % (i, metrics.classification_report(testlabel[i], result[i])))
#print metrics.confusion_matrix(testlabel[i], result)
sem.release()
def runner(i):
sem.acquire()
print("learn begin %s" % i)
clf = naive_bayes.GaussianNB()
clf = clf.fit(traindata, trainlabel[i])
svms.append((i, clf))
result[i] = clf.predict(testdata)
dbresult[i] = clf.predict(dbdata)
print("label %s done\n%s"
% (i, metrics.classification_report(testlabel[i], result[i])))
#print metrics.confusion_matrix(testlabel[i], result)
sem.release()
def test_gaussiannb():
iris = load_iris()
clf = GaussianNB()
clf.fit(iris.data, iris.target)
y_pred = clf.predict(iris.data)
print(y_pred)
clf_ = SKGaussianNB()
clf_.fit(iris.data, iris.target)
print(clf_.predict(iris.data))
print(iris.target)
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None):
self.logger = get_logger('classifier')
self.display_name = name
self.genres = genres
self.m_genres = { genre:i for i, genre in enumerate(genres) }
self.randstate = np.random.RandomState()
self.scaler = StandardScaler()
clf_kwargs = { } if not clf_kwargs else clf_kwargs
if type in ['svm', 'mlp']:
clf_kwargs['random_state'] = self.randstate
if type == 'knn':
self.proto_clf = KNeighborsClassifier(**clf_kwargs)
elif type == 'svm':
self.proto_clf = SVC(**clf_kwargs)
elif type == 'dtree':
self.proto_clf = DecisionTreeClassifier(**clf_kwargs)
elif type == 'gnb':
self.proto_clf = GaussianNB(**clf_kwargs)
elif type == 'perc':
self.proto_clf = Perceptron(**clf_kwargs)
elif type == 'mlp':
self.proto_clf = MLPClassifier(**clf_kwargs)
elif type == 'ada':
self.proto_clf = AdaBoostClassifier(**clf_kwargs)
else:
raise LookupError('Classifier type "{}" is invalid'.format(type))
self._convert_data(data)
self.logger.info('Classifier: {} (params={})'.format(
self.proto_clf.__class__.__name__,
clf_kwargs
))
def getModels():
result = []
result.append("LinearRegression")
result.append("BayesianRidge")
result.append("ARDRegression")
result.append("ElasticNet")
result.append("HuberRegressor")
result.append("Lasso")
result.append("LassoLars")
result.append("Rigid")
result.append("SGDRegressor")
result.append("SVR")
result.append("MLPClassifier")
result.append("KNeighborsClassifier")
result.append("SVC")
result.append("GaussianProcessClassifier")
result.append("DecisionTreeClassifier")
result.append("RandomForestClassifier")
result.append("AdaBoostClassifier")
result.append("GaussianNB")
result.append("LogisticRegression")
result.append("QuadraticDiscriminantAnalysis")
return result
def test_AdaBoostClassifier_base_classifier(*data):
'''
test Adaboost classifier with different number of classifier, and category of classifier
:param data: train_data, test_data, train_value, test_value
:return: None
'''
from sklearn.naive_bayes import GaussianNB
X_train,X_test,y_train,y_test=data
fig=plt.figure()
ax=fig.add_subplot(2,1,1)
clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
clf.fit(X_train,y_train)
## graph
estimators_num=len(clf.estimators_)
X=range(1,estimators_num+1)
ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(0,1)
ax.set_title("AdaBoostClassifier with Decision Tree")
ax=fig.add_subplot(2,1,2)
clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB())
clf.fit(X_train,y_train)
## graph
estimators_num=len(clf.estimators_)
X=range(1,estimators_num+1)
ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(0,1)
ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes")
plt.show()
def script_run():
# ??keyword
kw_list = build_key_word("train.txt")
# ????
fp = open("new_word.txt", encoding="utf-8", mode="w")
for word in kw_list:
fp.write(word + "\n")
fp.close()
# kw_list = load_key_words("word.txt")
feature, label = get_feature("train.txt", kw_list)
gnb = GaussianNB()
gnb = gnb.fit(feature, label)
joblib.dump(gnb, 'model/gnb.model')
print("????")
# print(feature,label)
def GaussianNBLocalModel(localTrainFeature, localTestFeature, localTrainLabel, config):
print 'train...'
model = GaussianNB()
model.fit(X = localTrainFeature.toarray(), y = localTrainLabel)
print 'predict...'
if config['prob'] == False:
return model.predict(localTestFeature.toarray())
else:
return model.predict_log_proba(localTestFeature.toarray())
#-- Gaussian Navie Bayes online predict model frame
def sk_demo_1():
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
clf = GaussianNB()
clf.fit(X, Y)
test_item = np.array([[-0.8, -1]])
print clf.predict(test_item)
# [1]
print clf.get_params()
def sk_nb_diabetes():
x_train, x_test, y_train, y_test = load_diabetes_data()
clf = GaussianNB()
def test_majority_label_iris():
"""Check classification by majority label on dataset iris."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='hard')
scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
assert_almost_equal(scores.mean(), 0.95, decimal=2)
def test_weights_iris():
"""Check classification by average probabilities on dataset iris."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
weights=[1, 2, 10])
scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
assert_almost_equal(scores.mean(), 0.93, decimal=2)