def get_numerical_features(fnum, fname, df, nvalues, dt,
sentinel, logt, plevel):
r"""Transform numerical features with imputation and possibly
log-transformation.
Parameters
----------
fnum : int
Feature number, strictly for logging purposes
fname : str
Name of the numerical column in the dataframe ``df``.
df : pandas.DataFrame
Dataframe containing the column ``fname``.
nvalues : int
The number of unique values.
dt : str
The values ``'float64'``, ``'int64'``, or ``'bool'``.
sentinel : float
The number to be imputed for NaN values.
logt : bool
If ``True``, then log-transform numerical values.
plevel : float
The p-value threshold to test if a feature is normally distributed.
Returns
-------
new_values : numpy array
The set of imputed and transformed features.
"""
feature = df[fname]
if len(feature) == nvalues:
logger.info("Feature %d: %s is a numerical feature of type %s with maximum number of values %d",
fnum, fname, dt, nvalues)
else:
logger.info("Feature %d: %s is a numerical feature of type %s with %d unique values",
fnum, fname, dt, nvalues)
# imputer for float, integer, or boolean data types
new_values = impute_values(feature, dt, sentinel)
# log-transform any values that do not fit a normal distribution
if logt and np.all(new_values > 0):
stat, pvalue = sps.normaltest(new_values)
if pvalue <= plevel:
logger.info("Feature %d: %s is not normally distributed [p-value: %f]",
fnum, fname, pvalue)
new_values = np.log(new_values)
return new_values
#
# Function get_polynomials
#
评论列表
文章目录