def loss_augmented_fit(self, X, y, loss_mult):
"""Fit K-Medoids to the provided data.
Parameters
----------
X : array-like or sparse matrix, shape=(n_samples, n_features)
Returns
-------
self
"""
self._check_init_args()
# Check that the array is good and attempt to convert it to
# Numpy array if possible
X = self._check_array(X)
# Apply distance metric to get the distance matrix
D = self.distance_func(X)
num_data = X.shape[0]
candidate_ids = range(num_data)
candidate_scores = np.zeros(num_data,)
subset = []
k = 0
while k < self.n_clusters:
candidate_scores = []
for i in candidate_ids:
# push i to subset
subset.append(i)
marginal_cost = np.sum(np.min(D[:, subset], axis=1))
loss = normalized_mutual_info_score(y,self._get_cluster_ics(D, subset))
candidate_scores.append(marginal_cost - loss_mult*loss)
# remove i from subset
subset.pop()
# push i_star to subset
i_star = candidate_ids[np.argmin(candidate_scores)]
bisect.insort(subset, i_star)
# remove i_star from candiate indices
del candidate_ids[bisect.bisect_left(candidate_ids, i_star)]
k = k + 1
#print '|S|: %d, F(S): %f' % (k, np.min(candidate_scores))
# Expose labels_ which are the assignments of
# the training data to clusters
self.labels_ = self._get_cluster_ics(D, subset)
# Expose cluster centers, i.e. medoids
self.cluster_centers_ = X.take(subset, axis=0)
# Expose indices of chosen cluster centers
self.center_ics_ = subset
return self
评论列表
文章目录