stacking.py 文件源码-python代码片段

def fit(self, X, y=None, **fit_params):

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)

        # meta_features_ have as many rows as there are in X and as many
        # columns as there are models. However, if use_proba is True then
        # ((n_classes - 1) * n_models) columns have to be stored
        if self.use_proba:
            self.n_probas_ = len(np.unique(y)) - 1
            self.meta_features_ = np.empty((len(X), len(self.models) * (self.n_probas_)))
        else:
            self.meta_features_ = np.empty((len(X), len(self.models)))

        # Generate CV folds
        folds = self.cv.split(X, y)

        for train_index, test_index in folds:
            for i, (name, model) in enumerate(self.models.items()):
                # Extract fit params for the model
                model_fit_params = fit_params.get(name, {})
                # Train the model on the training set
                model.fit(X[train_index], y[train_index], **model_fit_params)
                # If use_proba is True then the probabilities of each class for
                # each model have to be predicted and then stored into
                # meta_features
                if self.use_proba:
                    probabilities = model.predict_proba(X[test_index])
                    for j, k in enumerate(range(self.n_probas_ * i, self.n_probas_ * (i + 1))):
                        self.meta_features_[test_index, k] = probabilities[:, j]
                else:
                    self.meta_features_[test_index, i] = model.predict(X[test_index])

        # Combine the predictions with the original features
        if self.use_base_features:
            self.meta_features_ = np.hstack((self.meta_features_, X))

        self.meta_model.fit(self.meta_features_, y)

        # Each model has to be fit on all the data for further predictions
        for model in self.models.values():
            model.fit(X, y)

        return self