def preprocess_feature(self, feature, parameters):
is_not_empty = 1 - np.isclose(feature, normalization.MISSING_VALUE)
if parameters.feature_type == identify_types.BINARY:
# Binary features are always 1 unless they are 0
return ((feature != 0) * is_not_empty).astype(np.float32)
if parameters.boxcox_lambda is not None:
feature = stats.boxcox(
np.maximum(
feature + parameters.boxcox_shift,
normalization.BOX_COX_MARGIN
), parameters.boxcox_lambda
)
# No *= to ensure consistent out-of-place operation.
if parameters.feature_type == identify_types.PROBABILITY:
feature = np.clip(feature, 0.01, 0.99)
feature = special.logit(feature)
elif parameters.feature_type == identify_types.QUANTILE:
quantiles = parameters.quantiles
values = np.zeros(feature.shape)
for quantile in quantiles:
values += feature >= quantile
feature = values / float(len(quantiles))
elif parameters.feature_type == identify_types.ENUM:
possible_values = parameters.possible_values
mapping = {}
for i, possible_value in enumerate(possible_values):
mapping[possible_value] = i
output_feature = np.zeros((len(feature), len(possible_values)))
for i, val in enumerate(feature):
output_feature[i][mapping[val]] = 1.0
return output_feature
else:
feature = feature - parameters.mean
feature /= parameters.stddev
feature *= is_not_empty
return feature
评论列表
文章目录