def sigma_gak(dataset, n_samples=100, random_state=None):
"""Compute sigma value to be used for GAK.
This method was originally presented in [1]_.
Parameters
----------
dataset
A dataset of time series
n_samples : int (default: 100)
Number of samples on which median distance should be estimated
random_state : integer or numpy.RandomState or None (default: None)
The generator used to draw the samples. If an integer is given, it fixes the seed. Defaults to the global
numpy random number generator.
Returns
-------
float
Suggested bandwidth (:math:`\\sigma`) for the Global Alignment kernel
Example
-------
>>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]]
>>> sigma_gak(dataset=dataset, n_samples=200, random_state=0) # doctest: +ELLIPSIS
2.0...
See Also
--------
gak : Compute Global Alignment kernel
cdist_gak : Compute cross-similarity matrix using Global Alignment kernel
References
----------
.. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
"""
random_state = check_random_state(random_state)
dataset = to_time_series_dataset(dataset)
n_ts, sz, d = dataset.shape
if n_ts * sz < n_samples:
replace = True
else:
replace = False
sample_indices = random_state.choice(n_ts * sz, size=n_samples, replace=replace)
dists = pdist(dataset.reshape((-1, d))[sample_indices], metric="euclidean")
return numpy.median(dists) * numpy.sqrt(sz)
评论列表
文章目录