def kde(data, N=None, MIN=None, MAX=None):
# Parameters to set up the mesh on which to calculate
N = 2**14 if N is None else int(2**sci.ceil(sci.log2(N)))
if MIN is None or MAX is None:
minimum = min(data)
maximum = max(data)
Range = maximum - minimum
MIN = minimum - Range/10 if MIN is None else MIN
MAX = maximum + Range/10 if MAX is None else MAX
# Range of the data
R = MAX-MIN
# Histogram the data to get a crude first approximation of the density
M = len(data)
DataHist, bins = sci.histogram(data, bins=N, range=(MIN,MAX))
DataHist = DataHist/M
DCTData = scipy.fftpack.dct(DataHist, norm=None)
I = [iN*iN for iN in xrange(1, N)]
SqDCTData = (DCTData[1:]/2)**2
# The fixed point calculation finds the bandwidth = t_star
guess = 0.1
try:
t_star = scipy.optimize.brentq(fixed_point, 0, guess,
args=(M, I, SqDCTData))
except ValueError:
print 'Oops!'
return None
# Smooth the DCTransformed data using t_star
SmDCTData = DCTData*sci.exp(-sci.arange(N)**2*sci.pi**2*t_star/2)
# Inverse DCT to get density
density = scipy.fftpack.idct(SmDCTData, norm=None)*N/R
mesh = [(bins[i]+bins[i+1])/2 for i in xrange(N)]
bandwidth = sci.sqrt(t_star)*R
density = density/sci.trapz(density, mesh)
return bandwidth, mesh, density
kde.py 文件源码
python
阅读 32
收藏 0
点赞 0
评论 0
评论列表
文章目录