binning.py 文件源码

python
阅读 33 收藏 0 点赞 0 评论 0

项目:expan 作者: zalando 项目源码 文件源码
def create_bins(data, n_bins):
    """
    Create bins from the data value
    :param data: a list or a 1-dim array of data to determine the bins
    :param n_bins: number of bins to create
    :return: a list of Bin object
    """
    if data is None or len(data) <= 0:
        raise ValueError('Empty input array!')

    if n_bins <= 0:
        raise ValueError('Less than one bin makes no sense.')

    insufficient_distinct = False
    n_unique_values = len(np.unique([value for value in data if not is_number_and_nan(value)]))
    if n_unique_values < n_bins:
        insufficient_distinct = True
        warnings.warn("Insufficient unique values for requested number of bins. " +
                      "Number of bins will be reset to number of unique values.")
        n_bins = n_unique_values

    # cast into a numpy array to infer the dtype
    data_as_array = np.array(data)
    is_numeric = np.issubdtype(data_as_array.dtype, np.number)

    if is_numeric:
        bins = _create_numerical_bins(data_as_array, n_bins)
    else:
        bins = _create_categorical_bins(data_as_array, n_bins)

    if (not insufficient_distinct) and (len(bins) < n_bins):
        warnings.warn('Created less bins than requested.')

    return bins


#------- private methods for numerical binnings-------#
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号