def cv_score(classifier, dataset, metric=accuracy_score, n_folds=10):
"""
Calculate K-fold cross validation score.
"""
true_labels = []
predicted_labels = []
for train_idx, test_idx in StratifiedKFold(list(dataset.get_labels()), n_folds=n_folds):
# clear the classifier (call `clear` RPC).
classifier.clear()
# split the dataset to train/test dataset.
(train_ds, test_ds) = (dataset[train_idx], dataset[test_idx])
# train the classifier using train dataset.
for (idx, label) in classifier.train(train_ds):
pass
# test the classifier using test dataset.
for (idx, label, result) in classifier.classify(test_ds):
# labels are already desc sorted by score values, so you can get a label
# name with the hightest prediction score by:
pred_label = result[0][0]
# store the result.
true_labels.append(label)
predicted_labels.append(pred_label)
# return cross-validation score
return metric(true_labels, predicted_labels)
评论列表
文章目录