def train_model(documents, labels, sample_size=.3, verbose=True):
if verbose:
print('starting to generate training data...', end='', flush=True)
labeled_feature_set = list()
for n, doc in enumerate(documents):
feature = word_tokenize(' '.join(doc))
label = labels[n]
resampled = resample(feature, label, sample_size)
labeled_feature_set += resampled
if verbose:
print('done', flush=True)
print('training model...this may take a few minutes.',
flush=True, end='')
trained_model = NaiveBayesClassifier.train(iter(labeled_feature_set))
if verbose:
print('done', flush=True)
return trained_model
评论列表
文章目录