def preprocess(self, x, fit=False):
"""Transform each marginal to be as close to a standard Gaussian as possible.
'standard' (default) just subtracts the mean and scales by the std.
'empirical' does an empirical gaussianization (but this cannot be inverted).
'outliers' tries to squeeze in the outliers
Any other choice will skip the transformation."""
if self.missing_values is not None:
x, self.n_obs = mean_impute(x, self.missing_values) # Creates a copy
else:
self.n_obs = len(x)
if self.gaussianize == 'none':
pass
elif self.gaussianize == 'standard':
if fit:
mean = np.mean(x, axis=0)
# std = np.std(x, axis=0, ddof=0).clip(1e-10)
std = np.sqrt(np.sum((x - mean)**2, axis=0) / self.n_obs).clip(1e-10)
self.theta = (mean, std)
x = ((x - self.theta[0]) / self.theta[1])
if np.max(np.abs(x)) > 6 and self.verbose:
print("Warning: outliers more than 6 stds away from mean. Consider using gaussianize='outliers'")
elif self.gaussianize == 'outliers':
if fit:
mean = np.mean(x, axis=0)
std = np.std(x, axis=0, ddof=0).clip(1e-10)
self.theta = (mean, std)
x = g((x - self.theta[0]) / self.theta[1]) # g truncates long tails
elif self.gaussianize == 'empirical':
print("Warning: correct inversion/transform of empirical gauss transform not implemented.")
x = np.array([norm.ppf((rankdata(x_i) - 0.5) / len(x_i)) for x_i in x.T]).T
if self.gpu and fit: # Don't return GPU matrices when only transforming
x = cm.CUDAMatrix(x)
return x
评论列表
文章目录