def k_fold_classification(x, y, folds, classifier_name='logistic_regression', bootstrap=False):
x_train_list, y_train_list, x_test_list, y_test_list = k_fold_sample_data_set(x, y, folds)
model_performance_dict = dict()
total_accuracy = 0
for j in range(0, folds, 1):
# split data set in train and test set
if bootstrap:
x_train, y_train, x_test, y_test = random_sample_data_set(x, y, folds)
else:
x_train = x_train_list[j]
y_train = y_train_list[j]
x_test = x_test_list[j]
y_test = y_test_list[j]
x_train, x_test = scale_sets(x_train, x_test, classifier_name)
model = model_fitting(x_train, y_train, classifier_name)
predicted_labels = model.predict(x_test)
print(metrics.accuracy_score(y_test, predicted_labels))
total_accuracy += metrics.accuracy_score(y_test, predicted_labels)
model_performance_dict["accuracy"] = float(total_accuracy)/float(folds)
export_model_performance(model_performance_dict)
评论列表
文章目录