def normalize_padded(padded, means=None, stds=None):
"""Normalize by last dim of padded with means/stds or calculate them.
.. TODO::
* consider importing instead ex:
from sklearn.preprocessing import StandardScaler, RobustScaler
robust_scaler = RobustScaler()
x_train = robust_scaler.fit_transform(x_train)
x_test = robust_scaler.transform(x_test)
ValueError: Found array with dim 3. RobustScaler expected <= 2.
* Don't normalize binary features
* If events are sparse then this may lead to huge values.
"""
# TODO epsilon choice is random
epsilon = 1e-6
original_dtype = padded.dtype
is_flat = len(padded.shape) == 2
if is_flat:
padded = np.expand_dims(padded, axis=-1)
n_features = padded.shape[2]
n_obs = padded.shape[0] * padded.shape[1]
if means is None:
means = np.nanmean(np.float128(
padded.reshape(n_obs, n_features)), axis=0)
means = means.reshape([1, 1, n_features])
padded = padded - means
if stds is None:
stds = np.nanstd(np.float128(
padded.reshape(n_obs, n_features)), axis=0)
stds = stds.reshape([1, 1, n_features])
if (stds < epsilon).any():
print('warning. Constant cols: ', np.where((stds < epsilon).flatten()))
stds[stds < epsilon] = 1.0
# should be (small number)/1.0 as mean is subtracted.
# Possible prob depending on machine err
# 128 float cast otherwise
padded = (padded / stds).astype(original_dtype)
if is_flat:
# Return to flat
padded = np.squeeze(padded)
return padded, means, stds
评论列表
文章目录