def compute_percentile_from_distribution(counter, percentile):
""" Takes a Counter object (or value:frequency dict) and computes a single percentile.
Uses Type 7 interpolation from:
Hyndman, R.J.; Fan, Y. (1996). "Sample Quantiles in Statistical Packages".
"""
assert 0 <= percentile <= 100
n = np.sum(counter.values())
h = (n-1)*(percentile/100.0)
lower_value = None
cum_sum = 0
for value, freq in sorted(counter.items()):
cum_sum += freq
if cum_sum > np.floor(h) and lower_value is None:
lower_value = value
if cum_sum > np.ceil(h):
return lower_value + (h-np.floor(h)) * (value-lower_value)
# Test for compute_percentile_from_distribution()
#def test_percentile(x, p):
# c = Counter()
# for xi in x:
# c[xi] += 1
# my_res = np.array([compute_percentile_from_distribution(c, p_i) for p_i in p], dtype=float)
# numpy_res = np.percentile(x, p)
# print np.sum(np.abs(numpy_res - my_res))
评论列表
文章目录