def kfold_train(self, n_splits=3):
logger.info('train classifier using kFold')
kf = KFold(n_splits=n_splits, shuffle=True)
scores = []
precisions = []
recalls = []
for train_index, test_index in kf.split(self.data):
train_text = self.data.iloc[train_index]['text'].values
train_y = self.data.iloc[train_index]['class'].values
test_text = self.data.iloc[test_index]['text'].values
test_y = self.data.iloc[test_index]['class'].values
self.cls.train(train_text, train_y)
predictions = self.cls.predict(test_text)
self.confusion += confusion_matrix(test_y, predictions)
scores.append(f1_score(test_y, predictions, pos_label='geography'))
recalls.append(recall_score(test_y, predictions, pos_label='geography'))
precisions.append(precision_score(test_y, predictions, pos_label='geography'))
self.score = sum(scores) / len(scores)
self.precision = sum(precisions) / len(precisions)
self.recall = sum(recalls) / len(recalls)
return self.cls
评论列表
文章目录