def getMedianDistanceBetweenSamples(self, sampleSet=None) :
"""
Jaakkola's heuristic method for setting the width parameter of the Gaussian
radial basis function kernel is to pick a quantile (usually the median) of
the distribution of Euclidean distances between points having different
labels.
Reference:
Jaakkola, M. Diekhaus, and D. Haussler. Using the Fisher kernel method to detect
remote protein homologies. In T. Lengauer, R. Schneider, P. Bork, D. Brutlad, J.
Glasgow, H.- W. Mewes, and R. Zimmer, editors, Proceedings of the Seventh
International Conference on Intelligent Systems for Molecular Biology.
"""
numrows = sampleSet.shape[0]
samples = sampleSet
G = sum((samples * samples), 1)
Q = numpy.tile(G[:, None], (1, numrows))
R = numpy.tile(G, (numrows, 1))
distances = Q + R - 2 * numpy.dot(samples, samples.T)
distances = distances - numpy.tril(distances)
distances = distances.reshape(numrows**2, 1, order="F").copy()
return numpy.sqrt(0.5 * numpy.median(distances[distances > 0]))
评论列表
文章目录