def _dense_fit(self, X, strategy, missing_values, axis):
"""Fit the transformer on dense data."""
X = check_array(X, force_all_finite=False)
mask = _get_mask(X, missing_values)
masked_X = ma.masked_array(X, mask=mask)
# Mean
if strategy == "mean":
mean_masked = np.ma.mean(masked_X, axis=axis)
# Avoid the warning "Warning: converting a masked element to nan."
mean = np.ma.getdata(mean_masked)
mean[np.ma.getmask(mean_masked)] = np.nan
return mean
# Median
elif strategy == "median":
if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5):
# In old versions of numpy, calling a median on an array
# containing nans returns nan. This is different is
# recent versions of numpy, which we want to mimic
masked_X.mask = np.logical_or(masked_X.mask,
np.isnan(X))
median_masked = np.ma.median(masked_X, axis=axis)
# Avoid the warning "Warning: converting a masked element to nan."
median = np.ma.getdata(median_masked)
median[np.ma.getmaskarray(median_masked)] = np.nan
return median
# Most frequent
elif strategy == "most_frequent":
# scipy.stats.mstats.mode cannot be used because it will no work
# properly if the first element is masked and if it's frequency
# is equal to the frequency of the most frequent valid element
# See https://github.com/scipy/scipy/issues/2636
# To be able access the elements by columns
if axis == 0:
X = X.transpose()
mask = mask.transpose()
most_frequent = np.empty(X.shape[0])
for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):
row_mask = np.logical_not(row_mask).astype(np.bool)
row = row[row_mask]
most_frequent[i] = _most_frequent(row, np.nan, 0)
return most_frequent
Imputation.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录