def transform(self, X):
"""Compute the LLC representation of the provided data.
Parameters
----------
X : array_like or list
The local features to aggregate. They must be either nd arrays or
a list of nd arrays. In case of a list each item is aggregated
separately.
"""
# Get the local features and the number of local features per document
X, lengths = self._reshape_local_features(X)
# Preprocess the lengths list into indexes in the local feature array
starts = np.cumsum([0] + lengths).astype(int)
ends = np.cumsum(lengths).astype(int)
# Calculate the nearest neighbors
centroids = self._clusterer.cluster_centers_
distances = pairwise_distances(X, centroids)
K = self.neighbors
neighbors = np.argpartition(distances, K)[:, :K]
# Compute the llc representation
llc = np.zeros((len(lengths), self.n_codewords))
L2 = self.beta * np.eye(X.shape[1])
for i, (s, e) in enumerate(zip(starts, ends)):
for j in range(s, e):
# a = argmin_{1^T a = 1} ||x - Ca||_2^2 + \beta ||a||_2^2
C = centroids[neighbors[j]]
a = C.dot(np.linalg.inv(C.T.dot(C) + L2)).dot(X[j])
llc[i, neighbors[j]] = np.maximum(
llc[i, neighbors[j]],
a / a.sum()
)
return llc
评论列表
文章目录