base.py 文件源码

python
阅读 38 收藏 0 点赞 0 评论 0

项目:pandas-profiling 作者: JosPolfliet 项目源码 文件源码
def describe_numeric_1d(series, **kwargs):
    stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(),
            'max': series.max()}
    stats['range'] = stats['max'] - stats['min']

    for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
        stats[pretty_name(x)] = series.dropna().quantile(x) # The dropna() is a workaround for https://github.com/pydata/pandas/issues/13098
    stats['iqr'] = stats['75%'] - stats['25%']
    stats['kurtosis'] = series.kurt()
    stats['skewness'] = series.skew()
    stats['sum'] = series.sum()
    stats['mad'] = series.mad()
    stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
    stats['type'] = "NUM"
    stats['n_zeros'] = (len(series) - np.count_nonzero(series))
    stats['p_zeros'] = stats['n_zeros'] / len(series)
    # Histograms
    stats['histogram'] = histogram(series, **kwargs)
    stats['mini_histogram'] = mini_histogram(series, **kwargs)
    return pd.Series(stats, name=series.name)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号