def test_random_projection_embedding_quality():
data, _ = make_sparse_random_data(8, 5000, 15000)
eps = 0.2
original_distances = euclidean_distances(data, squared=True)
original_distances = original_distances.ravel()
non_identical = original_distances != 0.0
# remove 0 distances to avoid division by 0
original_distances = original_distances[non_identical]
for RandomProjection in all_RandomProjection:
rp = RandomProjection(n_components='auto', eps=eps, random_state=0)
projected = rp.fit_transform(data)
projected_distances = euclidean_distances(projected, squared=True)
projected_distances = projected_distances.ravel()
# remove 0 distances to avoid division by 0
projected_distances = projected_distances[non_identical]
distances_ratio = projected_distances / original_distances
# check that the automatically tuned values for the density respect the
# contract for eps: pairwise distances are preserved according to the
# Johnson-Lindenstrauss lemma
assert_less(distances_ratio.max(), 1 + eps)
assert_less(1 - eps, distances_ratio.min())
评论列表
文章目录