utils_scoring.py 文件源码-python代码片段

def score(self, estimator, X, y, advanced_scoring=False):

        X, y = utils.drop_missing_y_vals(X, y, output_column=None)

        if isinstance(estimator, GradientBoostingClassifier):
            X = X.toarray()

        predictions = estimator.predict_proba(X)


        if self.scoring_method == 'brier_score_loss':
            # At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred.
            probas = [max(min(row[1], 1), 0) for row in predictions]
            predictions = probas

        try:
            score = self.scoring_func(y, predictions)
        except ValueError as e:
            bad_val_indices = []
            for idx, val in enumerate(y):
                if str(val) in bad_vals_as_strings:
                    bad_val_indices.append(idx)

            predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
            y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]

            print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
            try:
                score = self.scoring_func(y, predictions)
            except ValueError:
                # Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params
                predictions = self.clean_probas(predictions)
                score = self.scoring_func(y, predictions)


        if advanced_scoring:
            return (-1 * score, predictions)
        else:
            return -1 * score