def create_bins(data, n_bins):
"""
Create bins from the data value
:param data: a list or a 1-dim array of data to determine the bins
:param n_bins: number of bins to create
:return: a list of Bin object
"""
if data is None or len(data) <= 0:
raise ValueError('Empty input array!')
if n_bins <= 0:
raise ValueError('Less than one bin makes no sense.')
insufficient_distinct = False
n_unique_values = len(np.unique([value for value in data if not is_number_and_nan(value)]))
if n_unique_values < n_bins:
insufficient_distinct = True
warnings.warn("Insufficient unique values for requested number of bins. " +
"Number of bins will be reset to number of unique values.")
n_bins = n_unique_values
# cast into a numpy array to infer the dtype
data_as_array = np.array(data)
is_numeric = np.issubdtype(data_as_array.dtype, np.number)
if is_numeric:
bins = _create_numerical_bins(data_as_array, n_bins)
else:
bins = _create_categorical_bins(data_as_array, n_bins)
if (not insufficient_distinct) and (len(bins) < n_bins):
warnings.warn('Created less bins than requested.')
return bins
#------- private methods for numerical binnings-------#
评论列表
文章目录