def outlier_prediction(x_train, y_train):
# Use built-in isolation forest or use predicted vs. actual
# Compute squared residuals of every point
# Make a threshold criteria for inclusion
# The prediction returns 1 if sample point is inlier. If outlier prediction returns -1
rng = np.random.RandomState(42)
clf_all_features = IsolationForest(max_samples=100, random_state=rng)
clf_all_features.fit(x_train)
# Predict if a particular sample is an outlier using all features for higher dimensional data set.
y_pred_train = clf_all_features.predict(x_train)
# Exclude suggested outlier samples for improvement of prediction power/score
outlier_map_out_train = np.array(map(lambda x: x == 1, y_pred_train))
x_train_modified = x_train[outlier_map_out_train, ]
y_train_modified = y_train[outlier_map_out_train, ]
return x_train_modified, y_train_modified
评论列表
文章目录