def classification(lead):
#classifiers = [
# ('ab', AdaBoostClassifier()),
# ('dt', DecisionTreeClassifier(max_depth=5)),
# ('kn', KNeighborsClassifier(16)),
#]
inputs = get_dataset_input_from_database(lead.keys())
outputs = get_dataset_output_from_database()
print('The total number of examples in the dataset is: %d' % (len(inputs)))
inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
print('The number of examples used for training are: %d' % (len(inputs_training)))
print('The number of examples used for testing are: %d' % (len(inputs_test)))
knn = KNeighborsClassifier(n_neighbors=7, p=2)
knn.fit(inputs_training, np.ravel(outputs_training))
print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
#voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
#voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
#print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
print('Lead data:')
print(lead)
data_to_predict = convert_dict_to_tuple(lead)
print('Lead data to predict:')
print(data_to_predict)
lead_status = knn.predict(data_to_predict)
lead_status_value = lead_status[0]
#lead_status = voting_classifier.predict(data_to_predict)
print('According to lead data, his status is: %d' % (lead_status_value))
print('[0] unqualified [1] qualified')
proba = knn.predict_proba(data_to_predict)
max_proba = max(proba[0])
print('Proba is: %d%%' %(max_proba*100))
lead_status_dict = dict()
dict.update(lead_status_dict, value=str(lead_status_value))
dict.update(lead_status_dict, proba=str(max_proba))
return lead_status_dict
评论列表
文章目录