def estimate_1hot_cost(X, is_categorical):
"""
Calculate the "memory expansion" after applying one-hot encoding.
:param X: array-like
The input data array
:param is_categorical: boolean array-like
Array of vector form that indicates
whether each features of X is categorical
:return: int
Calculated memory size in byte scale (expansion)
"""
n_columns = 0
count_labels_v = lambda v: np.sum(np.isfinite(np.unique(v))) - 1
n_labels = np.apply_along_axis(count_labels_v, 0, X)
n_columns += np.sum(n_labels[is_categorical])
estimated_memory = n_columns * X.shape[0] * X.dtype.itemsize
return estimated_memory
datamanager.py 文件源码
python
阅读 35
收藏 0
点赞 0
评论 0
评论列表
文章目录