python类MultinomialNB()的实例源码

base.py 文件源码 项目:SofPythonBot 作者: UtkucanBykl 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def bayes(self):
        self.mnb = MultinomialNB()
        self.y_train=self.y_train.astype('int')
        self.mnb.fit(self.x_trainvect,self.y_train)
classifier_bayesian.py 文件源码 项目:text-classification 作者: cahya-wirawan 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fit(self, dataset, filename):
        self.logger.debug("fit")
        self.clf = Pipeline([('vect', CountVectorizer()),
                             ('tfidf', TfidfTransformer()),
                             ('clf', MultinomialNB())
                             ])
        self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
        joblib.dump(self.clf, filename + ".pkl", compress=9)
classifiers.py 文件源码 项目:bof-aed 作者: rgrzeszi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train(self, datadict, labels=None):
        '''
        Runs the classifier training using the dictionary of label, features
        @param datadict: dictonary of label, features
        @param labels: (optional) list of labels. If given the order of labels is used from this list.
        '''

        # Set labels from data dict
        if labels is None:
            self.labels = datadict.keys()
        else:
            self.labels = labels
        # Train the GMM for BoF computation
        if self.model.gmm is None:
            print >> sys.stderr, 'Model not trained yet.'
            self.model.train(datadict, self.labels)

        print >> sys.stderr,'Computing',self.model.__class__.__name__,'...'
        # Parse dictionary into BoF representations and labels
        bofs, bofl = self._parse_dict(datadict, self.labels)

        #Create Multinomial Bayes
        print >> sys.stderr,'Training Multinomial Bayes ...'
        self.bay = bayes.MultinomialNB(alpha=0.5, fit_prior=False)
        self.bay.fit(bofs, bofl)
        return
movielens_bandit.py 文件源码 项目:striatum 作者: ntucllab 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def train_expert(action_context):
    logreg = OneVsRestClassifier(LogisticRegression())
    mnb = OneVsRestClassifier(MultinomialNB(), )
    logreg.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
    mnb.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
    return [logreg, mnb]
simulation_exp4p.py 文件源码 项目:striatum 作者: ntucllab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train_expert(history_context, history_action):
    n_round = len(history_context)
    history_context = np.array([history_context[t] for t in range(n_round)])
    history_action = np.array([history_action[t] for t in range(n_round)])
    logreg = OneVsRestClassifier(LogisticRegression())
    mnb = OneVsRestClassifier(MultinomialNB())
    logreg.fit(history_context, history_action)
    mnb.fit(history_context, history_action)
    return [logreg, mnb]
model_pipeline.py 文件源码 项目:Guess-Genre-By-Lyrics 作者: ormatt 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_pipeline(sample_col, parallel_jobs=None):
    feat_ext_objs = [feat_ext_class(sample_col)
                     for feat_ext_class in get_objs(FEAT_EXTS_DIR, 'Worker')]

    feat_ext_tuples = [(feat_ext_obj.feature_name, feat_ext_obj)
                       for feat_ext_obj in feat_ext_objs]

    pipeline = Pipeline([
        ('features', FeatureUnion(feat_ext_tuples, n_jobs=parallel_jobs)),
        ('describe_data', describe_data.Transformer()),
        ('classifier', MultinomialNB()),
    ])
    return pipeline
twenty_news_group.py 文件源码 项目:DataScience-And-MachineLearning-Handbook-For-Coders 作者: wxyyxc1992 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def train_classifier(self):
        """
        ???????????
        """

        self.extract_feature();

        self.clf = MultinomialNB().fit(
            self.train_tfidf, self.data['train'].target)
model.py 文件源码 项目:android_malware_detection 作者: congyuandong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def NBModel(self, train_data, test_data, train_labels, test_labels):
        model = MultinomialNB(alpha = 0.01)
        model.fit(train_data, train_labels)
        self.saveModel(model, 'NB')
        predict = model.predict(test_data)
        return metrics.accuracy_score(test_labels, predict)
bayes.py 文件源码 项目:opentc 作者: cahya-wirawan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def fit(self, dataset, filename):
        self.logger.debug("fit")
        self.clf = Pipeline([('vect', CountVectorizer()),
                             ('tfidf', TfidfTransformer()),
                             ('clf', MultinomialNB())
                             ])
        self.clf.fit(dataset.get_dataset()['data'], dataset.get_dataset()['target'])
        joblib.dump(self.clf, filename + ".pkl", compress=9)
benchmark.py 文件源码 项目:Bayes 作者: krzjoa 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def _init_classifiers(self):
        mnb = MultinomialNB()
        cnb = ComplementNB()
        nnb = NegationNB()
        unb = UniversalSetNB()
        snb = SelectiveNB()
        return [mnb, cnb, nnb, unb, snb]
02_tuning.py 文件源码 项目:Building-Machine-Learning-Systems-With-Python-Second-Edition 作者: PacktPublishing 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def create_ngram_model(params=None):
    tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
                                   analyzer="word", binary=False)
    clf = MultinomialNB()
    pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline
01_start.py 文件源码 项目:Building-Machine-Learning-Systems-With-Python-Second-Edition 作者: PacktPublishing 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def create_ngram_model():
    tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
                                   analyzer="word", binary=False)
    clf = MultinomialNB()
    pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])
    return pipeline
classifier_train.py 文件源码 项目:pygameweb 作者: pygame 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def make_classifier():
    pipeline = Pipeline([
        ('count_vectorizer',   CountVectorizer(ngram_range=(1, 2))),
        ('classifier',         MultinomialNB())
    ])
    return pipeline
tbs_ml.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def generate_base_classification():
    from sklearn.svm import LinearSVC, NuSVC, SVC
    from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
    models = [
        #(LinearSVC, params('C', 'loss')),
#         (NuSVC, params('nu', 'kernel', 'degree')),
        #(SVC, params('C', 'kernel')),
        #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')),
        (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')),
        #(GaussianProcessClassifier, None),
        (LogisticRegression, params('C', 'penalty')),
        #(PassiveAggressiveClassifier, params('C', 'loss')),
        #(RidgeClassifier, params('alpha')),
        # we do in-place modification of what the method params return in order to add
        # more loss functions that weren't defined in the method
        #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])),
        (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({
            'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree']
        })),
        (MultinomialNB, params('alpha')),
        #(GaussianNB, None),
        #(BernoulliNB, params('alpha'))
    ]

    return models
actual.py 文件源码 项目:AnswerClassify 作者: kenluck2001 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def makEnsemble( X, xlist, Y ):
    #naive bayes
    clf = MultinomialNB()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #K nearest neighbours
    clf = KNeighborsClassifier()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #Logistic regression
    clf = LogisticRegression(C=1)
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #random forest
    clf  = RandomForestClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #extra forest
    clf = ExtraTreesClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #decision forest
    clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #gradient boosting
    params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
                  'learning_rate': 0.01}
    clf = GradientBoostingClassifier(**params)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)
doc.py 文件源码 项目:AnswerClassify 作者: kenluck2001 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def find(lst, elem):
    return [i for i, x in enumerate(lst) if x == elem ]


#clf = MultinomialNB()
TextClf.py 文件源码 项目:Chinese_text_classifier 作者: swordLong 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def naive_bayes_classifier(train_x, train_y):
    from sklearn.naive_bayes import MultinomialNB
    model = MultinomialNB(alpha=0.01)
    model.fit(train_x, train_y)
    return model


# KNN Classifier
test_wrapper.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        self.estimator = mock.MagicMock(spec=MultinomialNB())

        Wrapper.__init__(self, self.estimator)
        MockVisualizer.__init__(self, **kwargs)
test_boundaries.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_real_data_set_viz(self):
        model = naive_bayes.MultinomialNB()

        data = datasets.load_iris()
        feature_names = [name.replace(' ', '_') for name in  data.feature_names ]
        df = pd.DataFrame(data.data, columns=feature_names)
        X = df[['sepal_length_(cm)', 'sepal_width_(cm)']].as_matrix()
        y = data.target

        visualizer = DecisionBoundariesVisualizer(model)
        visualizer.fit_draw_poof(X, y)
        self.assert_images_similar(visualizer)
test_boundaries.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_quick_method(self):
        model = naive_bayes.MultinomialNB()

        data = datasets.load_iris()
        feature_names = [name.replace(' ', '_') for name in  data.feature_names ]
        df = pd.DataFrame(data.data, columns=feature_names)
        X = df[['sepal_length_(cm)', 'sepal_width_(cm)']].as_matrix()
        y = data.target

        visualizer = decisionviz(model, X, y)


问题


面经


文章

微信
公众号

扫码关注公众号