def _compute_mi_cc(x, y, n_neighbors):
"""Compute mutual information between two continuous variables.
Parameters
----------
x, y : ndarray, shape (n_samples,)
Samples of two continuous random variables, must have an identical
shape.
n_neighbors : int
Number of nearest neighbors to search for each point, see [1]_.
Returns
-------
mi : float
Estimated mutual information. If it turned out to be negative it is
replace by 0.
Notes
-----
True mutual information can't be negative. If its estimate by a numerical
method is negative, it means (providing the method is adequate) that the
mutual information is close to 0 and replacing it by 0 is a reasonable
strategy.
References
----------
.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
information". Phys. Rev. E 69, 2004.
"""
n_samples = x.size
x = x.reshape((-1, 1))
y = y.reshape((-1, 1))
xy = np.hstack((x, y))
# Here we rely on NearestNeighbors to select the fastest algorithm.
nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors)
nn.fit(xy)
radius = nn.kneighbors()[0]
radius = np.nextafter(radius[:, -1], 0)
# Algorithm is selected explicitly to allow passing an array as radius
# later (not all algorithms support this).
nn.set_params(algorithm='kd_tree')
nn.fit(x)
ind = nn.radius_neighbors(radius=radius, return_distance=False)
nx = np.array([i.size for i in ind])
nn.fit(y)
ind = nn.radius_neighbors(radius=radius, return_distance=False)
ny = np.array([i.size for i in ind])
mi = (digamma(n_samples) + digamma(n_neighbors) -
np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1)))
return max(0, mi)
评论列表
文章目录