def initialize_dictionary(self, X, max_iter=100, redo=5, n_samples=50000, normalize=False):
"""
Samples some feature vectors from X and learns an initial dictionary
:param X: list of objects
:param max_iter: maximum k-means iters
:param redo: number of times to repeat k-means clustering
:param n_samples: number of feature vectors to sample from the objects
:param normalize: use l_2 norm normalization for the feature vectors
"""
# Sample only a small number of feature vectors from each object
samples_per_object = int(np.ceil(n_samples / len(X)))
features = None
print("Sampling feature vectors...")
for i in (range(len(X))):
idx = np.random.permutation(X[i].shape[0])[:samples_per_object + 1]
cur_features = X[i][idx, :]
if features is None:
features = cur_features
else:
features = np.vstack((features, cur_features))
print("Clustering feature vectors...")
features = np.float64(features)
if normalize:
features = feature_normalizer(features)
V = cluster.k_means(features, n_clusters=self.Nk, max_iter=max_iter, n_init=redo)
self.V.set_value(np.asarray(V[0], dtype=theano.config.floatX))
评论列表
文章目录