transforms.py 文件源码-python代码片段

def normalize_padded(padded, means=None, stds=None):
    """Normalize by last dim of padded with means/stds or calculate them.

        .. TODO::
           * consider importing instead ex:

                from sklearn.preprocessing import StandardScaler, RobustScaler
                robust_scaler = RobustScaler()
                x_train = robust_scaler.fit_transform(x_train)
                x_test  = robust_scaler.transform(x_test)
                ValueError: Found array with dim 3. RobustScaler expected <= 2.

           * Don't normalize binary features
           * If events are sparse then this may lead to huge values.
    """
    # TODO epsilon choice is random
    epsilon = 1e-6
    original_dtype = padded.dtype

    is_flat = len(padded.shape) == 2
    if is_flat:
        padded = np.expand_dims(padded, axis=-1)

    n_features = padded.shape[2]
    n_obs = padded.shape[0] * padded.shape[1]

    if means is None:
        means = np.nanmean(np.float128(
            padded.reshape(n_obs, n_features)), axis=0)

    means = means.reshape([1, 1, n_features])
    padded = padded - means

    if stds is None:
        stds = np.nanstd(np.float128(
            padded.reshape(n_obs, n_features)), axis=0)

    stds = stds.reshape([1, 1, n_features])
    if (stds < epsilon).any():
        print('warning. Constant cols: ', np.where((stds < epsilon).flatten()))
        stds[stds < epsilon] = 1.0
        # should be (small number)/1.0 as mean is subtracted.
        # Possible prob depending on machine err

    # 128 float cast otherwise
    padded = (padded / stds).astype(original_dtype)

    if is_flat:
        # Return to flat
        padded = np.squeeze(padded)
    return padded, means, stds