def bayes(self):
self.mnb = MultinomialNB()
self.y_train=self.y_train.astype('int')
self.mnb.fit(self.x_trainvect,self.y_train)
python类MultinomialNB()的实例源码
def fit(self, dataset, filename):
self.logger.debug("fit")
self.clf = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', MultinomialNB())
])
self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
joblib.dump(self.clf, filename + ".pkl", compress=9)
def train(self, datadict, labels=None):
'''
Runs the classifier training using the dictionary of label, features
@param datadict: dictonary of label, features
@param labels: (optional) list of labels. If given the order of labels is used from this list.
'''
# Set labels from data dict
if labels is None:
self.labels = datadict.keys()
else:
self.labels = labels
# Train the GMM for BoF computation
if self.model.gmm is None:
print >> sys.stderr, 'Model not trained yet.'
self.model.train(datadict, self.labels)
print >> sys.stderr,'Computing',self.model.__class__.__name__,'...'
# Parse dictionary into BoF representations and labels
bofs, bofl = self._parse_dict(datadict, self.labels)
#Create Multinomial Bayes
print >> sys.stderr,'Training Multinomial Bayes ...'
self.bay = bayes.MultinomialNB(alpha=0.5, fit_prior=False)
self.bay.fit(bofs, bofl)
return
def train_expert(action_context):
logreg = OneVsRestClassifier(LogisticRegression())
mnb = OneVsRestClassifier(MultinomialNB(), )
logreg.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
mnb.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
return [logreg, mnb]
def train_expert(history_context, history_action):
n_round = len(history_context)
history_context = np.array([history_context[t] for t in range(n_round)])
history_action = np.array([history_action[t] for t in range(n_round)])
logreg = OneVsRestClassifier(LogisticRegression())
mnb = OneVsRestClassifier(MultinomialNB())
logreg.fit(history_context, history_action)
mnb.fit(history_context, history_action)
return [logreg, mnb]
def get_pipeline(sample_col, parallel_jobs=None):
feat_ext_objs = [feat_ext_class(sample_col)
for feat_ext_class in get_objs(FEAT_EXTS_DIR, 'Worker')]
feat_ext_tuples = [(feat_ext_obj.feature_name, feat_ext_obj)
for feat_ext_obj in feat_ext_objs]
pipeline = Pipeline([
('features', FeatureUnion(feat_ext_tuples, n_jobs=parallel_jobs)),
('describe_data', describe_data.Transformer()),
('classifier', MultinomialNB()),
])
return pipeline
twenty_news_group.py 文件源码
项目:DataScience-And-MachineLearning-Handbook-For-Coders
作者: wxyyxc1992
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def train_classifier(self):
"""
???????????
"""
self.extract_feature();
self.clf = MultinomialNB().fit(
self.train_tfidf, self.data['train'].target)
def NBModel(self, train_data, test_data, train_labels, test_labels):
model = MultinomialNB(alpha = 0.01)
model.fit(train_data, train_labels)
self.saveModel(model, 'NB')
predict = model.predict(test_data)
return metrics.accuracy_score(test_labels, predict)
def fit(self, dataset, filename):
self.logger.debug("fit")
self.clf = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', MultinomialNB())
])
self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
joblib.dump(self.clf, filename + ".pkl", compress=9)
def _init_classifiers(self):
mnb = MultinomialNB()
cnb = ComplementNB()
nnb = NegationNB()
unb = UniversalSetNB()
snb = SelectiveNB()
return [mnb, cnb, nnb, unb, snb]
02_tuning.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def create_ngram_model(params=None):
tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
analyzer="word", binary=False)
clf = MultinomialNB()
pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
01_start.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def create_ngram_model():
tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
analyzer="word", binary=False)
clf = MultinomialNB()
pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])
return pipeline
def make_classifier():
pipeline = Pipeline([
('count_vectorizer', CountVectorizer(ngram_range=(1, 2))),
('classifier', MultinomialNB())
])
return pipeline
def generate_base_classification():
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
models = [
#(LinearSVC, params('C', 'loss')),
# (NuSVC, params('nu', 'kernel', 'degree')),
#(SVC, params('C', 'kernel')),
#(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
(DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
(RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')),
#(GaussianProcessClassifier, None),
(LogisticRegression, params('C', 'penalty')),
#(PassiveAggressiveClassifier, params('C', 'loss')),
#(RidgeClassifier, params('alpha')),
# we do in-place modification of what the method params return in order to add
# more loss functions that weren't defined in the method
#(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])),
(KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({
'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree']
})),
(MultinomialNB, params('alpha')),
#(GaussianNB, None),
#(BernoulliNB, params('alpha'))
]
return models
def makEnsemble( X, xlist, Y ):
#naive bayes
clf = MultinomialNB()
clf.fit( xlist, Y )
featureSelectModel.append (clf)
#K nearest neighbours
clf = KNeighborsClassifier()
clf.fit( xlist, Y )
featureSelectModel.append (clf)
#Logistic regression
clf = LogisticRegression(C=1)
clf.fit( xlist, Y )
featureSelectModel.append (clf)
#random forest
clf = RandomForestClassifier(n_estimators = 400)
clf.fit( X, Y )
wholeFeatureModel.append (clf)
#extra forest
clf = ExtraTreesClassifier(n_estimators = 400)
clf.fit( X, Y )
wholeFeatureModel.append (clf)
#decision forest
clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0)
clf.fit( X, Y )
wholeFeatureModel.append (clf)
#gradient boosting
params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
'learning_rate': 0.01}
clf = GradientBoostingClassifier(**params)
clf.fit( X, Y )
wholeFeatureModel.append (clf)
def find(lst, elem):
return [i for i, x in enumerate(lst) if x == elem ]
#clf = MultinomialNB()
def naive_bayes_classifier(train_x, train_y):
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB(alpha=0.01)
model.fit(train_x, train_y)
return model
# KNN Classifier
def __init__(self, **kwargs):
self.estimator = mock.MagicMock(spec=MultinomialNB())
Wrapper.__init__(self, self.estimator)
MockVisualizer.__init__(self, **kwargs)
def test_real_data_set_viz(self):
model = naive_bayes.MultinomialNB()
data = datasets.load_iris()
feature_names = [name.replace(' ', '_') for name in data.feature_names ]
df = pd.DataFrame(data.data, columns=feature_names)
X = df[['sepal_length_(cm)', 'sepal_width_(cm)']].as_matrix()
y = data.target
visualizer = DecisionBoundariesVisualizer(model)
visualizer.fit_draw_poof(X, y)
self.assert_images_similar(visualizer)
def test_quick_method(self):
model = naive_bayes.MultinomialNB()
data = datasets.load_iris()
feature_names = [name.replace(' ', '_') for name in data.feature_names ]
df = pd.DataFrame(data.data, columns=feature_names)
X = df[['sepal_length_(cm)', 'sepal_width_(cm)']].as_matrix()
y = data.target
visualizer = decisionviz(model, X, y)