def fit(self, df_train, df_test):
"""
Computes the drift between the two datasets
Parameters
----------
df_train : pandas dataframe of shape = (n_train, p)
The train set
df_test : pandas dataframe of shape = (n_test, p)
The test set
Returns
-------
self : object
Returns self.
"""
df_train["target"] = 0
df_test["target"] = 1
self.__target = pd.concat((df_train.target, df_test.target),
ignore_index=True)
if self.stratify:
self.__cv = StratifiedKFold(n_splits=self.n_folds,
shuffle=True,
random_state=self.random_state)
else:
self.__cv = KFold(n_splits=self.n_folds,
shuffle=True,
random_state=self.random_state)
X_tmp = pd.concat((df_train, df_test),
ignore_index=True).drop(['target'], axis=1)
self.__pred = cross_val_predict(estimator=self.estimator,
X=X_tmp,
y=self.__target,
cv=self.__cv,
method="predict_proba")[:,1]
del df_train["target"]
del df_test["target"]
self.__fitOK = True
return self
评论列表
文章目录