def encode(self, data, metric='euclidean'):
""" Employ a nearest-neighbor rule to encode the given ``data`` using the codebook.
Parameters
----------
data : real array-like, shape(n_samples, n_features)
Data matrix, each row represents a sample.
metric : string
One of the following valid options as defined for function http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html.
Valid options include:
- euclidean
- cityblock
- l1
- cosine
Returns
-------
encoded_data : real array-like, shape(n_samples, n_features)
``data``, as represented by the prototypes in codebook.
ts_symbols : list, shape(n_samples, 1)
A discrete symbolic time series
"""
# Perform a proposed data mining procedure as described in [Laskaris2004].
mds = MDS(1, random_state=self.rng)
protos_1d = mds.fit_transform(self.protos).ravel()
sorted_protos_1d = np.argsort(protos_1d)
sprotos = self.protos[sorted_protos_1d]
nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto', metric=metric).fit(sprotos)
_, self.__symbols = nbrs.kneighbors(data)
self.__encoding = sprotos[self.__symbols]
return (self.__encoding, self.__symbols)
评论列表
文章目录