def variable_batch_size_comparison(data):
batch_sizes = [i.astype(int) for i in np.linspace(data.shape[0] // 10,
data.shape[0], num=10)]
for n_components in [i.astype(int) for i in
np.linspace(data.shape[1] // 10,
data.shape[1], num=4)]:
all_times = defaultdict(list)
all_errors = defaultdict(list)
pca = PCA(n_components=n_components)
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
('rpca', rpca)]}
# Create flat baselines to compare the variation over batch size
all_times['pca'].extend([results_dict['pca']['time']] *
len(batch_sizes))
all_errors['pca'].extend([results_dict['pca']['error']] *
len(batch_sizes))
all_times['rpca'].extend([results_dict['rpca']['time']] *
len(batch_sizes))
all_errors['rpca'].extend([results_dict['rpca']['error']] *
len(batch_sizes))
for batch_size in batch_sizes:
ipca = IncrementalPCA(n_components=n_components,
batch_size=batch_size)
results_dict = {k: benchmark(est, data) for k, est in [('ipca',
ipca)]}
all_times['ipca'].append(results_dict['ipca']['time'])
all_errors['ipca'].append(results_dict['ipca']['error'])
plot_batch_times(all_times, n_components, batch_sizes, data)
# RandomizedPCA error is always worse (approx 100x) than other PCA
# tests
plot_batch_errors(all_errors, n_components, batch_sizes, data)
bench_plot_incremental_pca.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录