def describe_numeric_1d(series, **kwargs):
stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(),
'max': series.max()}
stats['range'] = stats['max'] - stats['min']
for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
stats[pretty_name(x)] = series.dropna().quantile(x) # The dropna() is a workaround for https://github.com/pydata/pandas/issues/13098
stats['iqr'] = stats['75%'] - stats['25%']
stats['kurtosis'] = series.kurt()
stats['skewness'] = series.skew()
stats['sum'] = series.sum()
stats['mad'] = series.mad()
stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
stats['type'] = "NUM"
stats['n_zeros'] = (len(series) - np.count_nonzero(series))
stats['p_zeros'] = stats['n_zeros'] / len(series)
# Histograms
stats['histogram'] = histogram(series, **kwargs)
stats['mini_histogram'] = mini_histogram(series, **kwargs)
return pd.Series(stats, name=series.name)
评论列表
文章目录