def generalized_esd(x, r, alpha=0.05, method='mean'):
"""Generalized ESD test for outliers
(http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm).
Args:
x (numpy.ndarray): the data
r (int): max number of outliers
alpha (float): the signifiance level
method (str): 'median' or 'mean'
Returns:
list[int]: list of the index of outliers
"""
x = np.asarray(x, dtype=np.float64)
fn = __get_pd_median if method == 'median' else __get_pd_mean
NaN = float('nan')
outliers = []
N = len(x)
for i in range(1, r + 1):
if np.any(~np.isnan(x)):
m, e = fn(x)
if e != 0.:
y = np.abs(x - m)
j = np.nanargmax(y)
R = y[j]
lam = __get_lambda_critical(N, i, alpha)
if R > lam * e:
outliers.append(j)
x[j] = NaN
else:
break
else:
break
else:
break
return outliers
评论列表
文章目录