def transform(self, X, **transform_params):
if X.shape[0] < 1/self.contamination:
return X
self.isolation_forest = IsolationForest(contamination=self.contamination,
n_estimators=self.n_estimators,
n_jobs=self.n_jobs)
to_analyze = self._columns_to_apply(X)
if to_analyze is None:
to_analyze = self._numeric_columns(X)
rest = self._rest_columns(X, to_analyze)
self.isolation_forest.fit(to_analyze)
labels = self.isolation_forest.predict(to_analyze)
to_analyze['_outlier'] = labels; to_analyze = to_analyze[to_analyze['_outlier'] == 1];
del(to_analyze['_outlier'])
rest['_outlier'] = labels; rest = rest[rest['_outlier'] == 1]; del(rest['_outlier'])
if self.verbose:
print('%s Now has %s' % (self.class_name, to_analyze.shape[0]))
return pd.concat((to_analyze, rest), axis=1)
评论列表
文章目录