def test_labels_assignment_and_inertia():
# pure numpy implementation as easily auditable reference gold
# implementation
rng = np.random.RandomState(42)
noisy_centers = centers + rng.normal(size=centers.shape)
labels_gold = - np.ones(n_samples, dtype=np.int)
mindist = np.empty(n_samples)
mindist.fill(np.infty)
for center_id in range(n_clusters):
dist = np.sum((X - noisy_centers[center_id]) ** 2, axis=1)
labels_gold[dist < mindist] = center_id
mindist = np.minimum(dist, mindist)
inertia_gold = mindist.sum()
assert_true((mindist >= 0.0).all())
assert_true((labels_gold != -1).all())
# perform label assignment using the dense array input
x_squared_norms = (X ** 2).sum(axis=1)
labels_array, inertia_array = _labels_inertia(
X, x_squared_norms, noisy_centers)
assert_array_almost_equal(inertia_array, inertia_gold)
assert_array_equal(labels_array, labels_gold)
# perform label assignment using the sparse CSR input
x_squared_norms_from_csr = row_norms(X_csr, squared=True)
labels_csr, inertia_csr = _labels_inertia(
X_csr, x_squared_norms_from_csr, noisy_centers)
assert_array_almost_equal(inertia_csr, inertia_gold)
assert_array_equal(labels_csr, labels_gold)
评论列表
文章目录