def compute_reduced_representation(self):
if not self.compute_reduced:
return None
config_score = simple_config.load()["score"]
f_db = os.path.join(
config_score["output_data_directory"],
config_score["document_scores"]["f_db"]
)
h5 = touch_h5(f_db)
g = h5[self.method]
keys = g.keys()
V = np.vstack([g[x]["V"][:] for x in keys])
sizes = [g[x]["_ref"].shape[0] for x in keys]
nc = self.reduced_n_components
clf = IncrementalPCA(n_components=nc)
msg = "Performing PCA on {}, ({})->({})"
print(msg.format(self.method, V.shape[1], nc))
VX = clf.fit_transform(V)
EVR = clf.explained_variance_ratio_
COMPONENTS = clf.components_
for key, size in zip(keys, sizes):
# Take slices equal to the size
vx, VX = VX[:size,:], VX[size:, :]
evr, EVR = EVR[:size], EVR[size:]
com, COMPONENTS = COMPONENTS[:size,:], COMPONENTS[size:, :]
g[key].create_dataset("VX", data=vx, **self.h5py_args)
g[key].create_dataset("VX_explained_variance_ratio_", data=evr)
g[key].create_dataset("VX_components_", data=com)
h5.close()
评论列表
文章目录