naive_bayes.py 文件源码-python代码片段

def train_model(data, target):
    """
    Splits the data into a training set and test set

    Instatiating a Bernoulli Naive Bayes classifier, train on the training set,
    and then evaluate the model based upon the test set
    """

    # Using cross-validation
    # TO TRY: stratification for dividing preclassified tweets into homogenous subgroups before
    # sampling in order to improve the representativeness of the sampling

    train_tweets, validation_tweets, train_sentiment, validation_sentiment = cross_validation.train_test_split(data, 
                                                                                                target,
                                                                                                test_size=0.4)


    # Fitting the Naive Bayes classifier wtih the training tweets and corresponding sentiment
    classifier = BernoulliNB().fit(train_tweets, train_sentiment)


    predicted = classifier.predict(validation_tweets)

    # Using the cross-validation split, evaluate the accuracy of the predicted tweets
    evaluate_model(validation_sentiment, predicted)

    # Pickling the classifier
    pickle_file = open('nb_classifier.pickle', 'wb')
    pickle.dump(classifier, pickle_file)
    pickle_file.close()

    return classifier


################################################################################