def train_model(data, target):
"""
Splits the data into a training set and test set
Instatiating a Bernoulli Naive Bayes classifier, train on the training set,
and then evaluate the model based upon the test set
"""
# Using cross-validation
# TO TRY: stratification for dividing preclassified tweets into homogenous subgroups before
# sampling in order to improve the representativeness of the sampling
train_tweets, validation_tweets, train_sentiment, validation_sentiment = cross_validation.train_test_split(data,
target,
test_size=0.4)
# Fitting the Naive Bayes classifier wtih the training tweets and corresponding sentiment
classifier = BernoulliNB().fit(train_tweets, train_sentiment)
predicted = classifier.predict(validation_tweets)
# Using the cross-validation split, evaluate the accuracy of the predicted tweets
evaluate_model(validation_sentiment, predicted)
# Pickling the classifier
pickle_file = open('nb_classifier.pickle', 'wb')
pickle.dump(classifier, pickle_file)
pickle_file.close()
return classifier
################################################################################
评论列表
文章目录