def fit(self, X, y=None, **fit_params):
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# meta_features_ have as many rows as there are in X and as many
# columns as there are models. However, if use_proba is True then
# ((n_classes - 1) * n_models) columns have to be stored
if self.use_proba:
self.n_probas_ = len(np.unique(y)) - 1
self.meta_features_ = np.empty((len(X), len(self.models) * (self.n_probas_)))
else:
self.meta_features_ = np.empty((len(X), len(self.models)))
# Generate CV folds
folds = self.cv.split(X, y)
for train_index, test_index in folds:
for i, (name, model) in enumerate(self.models.items()):
# Extract fit params for the model
model_fit_params = fit_params.get(name, {})
# Train the model on the training set
model.fit(X[train_index], y[train_index], **model_fit_params)
# If use_proba is True then the probabilities of each class for
# each model have to be predicted and then stored into
# meta_features
if self.use_proba:
probabilities = model.predict_proba(X[test_index])
for j, k in enumerate(range(self.n_probas_ * i, self.n_probas_ * (i + 1))):
self.meta_features_[test_index, k] = probabilities[:, j]
else:
self.meta_features_[test_index, i] = model.predict(X[test_index])
# Combine the predictions with the original features
if self.use_base_features:
self.meta_features_ = np.hstack((self.meta_features_, X))
self.meta_model.fit(self.meta_features_, y)
# Each model has to be fit on all the data for further predictions
for model in self.models.values():
model.fit(X, y)
return self
评论列表
文章目录