def score(self, estimator, X, y, advanced_scoring=False):
X, y = utils.drop_missing_y_vals(X, y, output_column=None)
if isinstance(estimator, GradientBoostingClassifier):
X = X.toarray()
predictions = estimator.predict_proba(X)
if self.scoring_method == 'brier_score_loss':
# At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred.
probas = [max(min(row[1], 1), 0) for row in predictions]
predictions = probas
try:
score = self.scoring_func(y, predictions)
except ValueError as e:
bad_val_indices = []
for idx, val in enumerate(y):
if str(val) in bad_vals_as_strings:
bad_val_indices.append(idx)
predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]
print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
try:
score = self.scoring_func(y, predictions)
except ValueError:
# Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params
predictions = self.clean_probas(predictions)
score = self.scoring_func(y, predictions)
if advanced_scoring:
return (-1 * score, predictions)
else:
return -1 * score
评论列表
文章目录