def binHisto(data, verbose=False):
"""
Calculates bin width and number of bins for histogram using Freedman-Diaconis rule, if rule fails, defaults to square-root method
The Freedman-Diaconis method is detailed in:
Freedman, D., and P. Diaconis (1981), On the histogram as a density estimator: L2 theory, Z. Wahrscheinlichkeitstheor. Verw. Geb., 57, 453–476
and is also described by:
Wilks, D. S. (2006), Statistical Methods in the Atmospheric Sciences, 2nd ed.
Parameters
==========
data : array_like
list/array of data values
verbose : boolean (optional)
print out some more information
Returns
=======
out : tuple
calculated width of bins using F-D rule, number of bins (nearest integer) to use for histogram
Examples
========
>>> import numpy, spacepy
>>> import matplotlib.pyplot as plt
>>> numpy.random.seed(8675301)
>>> data = numpy.random.randn(1000)
>>> binw, nbins = spacepy.toolbox.binHisto(data)
>>> print(nbins)
19.0
>>> p = plt.hist(data, bins=nbins, histtype='step', normed=True)
See Also
========
matplotlib.pyplot.hist
"""
from matplotlib.mlab import prctile
pul = prctile(data, p=(25,75)) #get confidence interval
ql, qu = pul[0], pul[1]
iqr = qu-ql
binw = 2.*iqr/(len(data)**(1./3.))
if binw != 0:
nbins = round((max(data)-min(data))/binw)
# if nbins is 0, NaN or inf don't use the F-D rule just use sqrt(num) rule
if binw == 0 or nbins == 0 or not np.isfinite(nbins):
nbins = round(np.sqrt(len(data)))
binw = len(data)/nbins
if verbose:
print("Used sqrt rule")
else:
if verbose:
print("Used F-D rule")
return (binw, nbins)
评论列表
文章目录