def run(self):
training_x, training_y, training_ids = self.get_training_data()
test_x, test_y, test_ids = self.get_test_data()
clf = self.define_model(self.model_name, self.model_params)
clf.fit(training_x, training_y)
res_predict = clf.predict(test_x)
if (self.model_name == "SGDClassifier" and (clf.loss =="hinge" or clf.loss == "perceptron")) or self.model_name == "linear.SVC":
res = list(clf.decision_function(test_x))
else:
res = list(clf.predict_proba(test_x)[:,1])
#fp, fn, tp, tn = self.compute_confusion_matrix(res[:,0], test_y)
result_dictionary = {'training_ids': training_ids,
'predictions_test_y': list(res_predict),
'prob_prediction_test_y': res ,
'test_y': list(test_y),
'test_ids': list(test_ids),
'model_name': self.model_name,
'model_params': self.model_params,
'label': self.label,
'feature_columns_used': self.cols_to_use,
'config': self.config,
'feature_importance': self.get_feature_importance(clf, self.model_name),
'columned_used_for_feat_importance': list(training_x.columns.values)}
return result_dictionary, clf
评论列表
文章目录