def cross_validation():
x_train, x_test, y_train, y_test = load_data()
k_lst = list(range(1, 30))
lst_scores = []
for k in k_lst:
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
lst_scores.append(scores.mean())
# changing to misclassification error
MSE = [1 - x for x in lst_scores]
optimal_k = k_lst[MSE.index(min(MSE))]
print "The optimal number of neighbors is %d" % optimal_k
# plot misclassification error vs k
# plt.plot(k_lst, MSE)
# plt.ylabel('Misclassification Error')
plt.plot(k_lst, lst_scores)
plt.xlabel('Number of Neighbors K')
plt.ylabel('correct classification rate')
plt.show()
评论列表
文章目录