pd_pivot.py 文件源码-python代码片段

def titanic_1():
    titanic = sns.load_dataset('titanic')
    print titanic.head()
    #    survived  pclass     sex   age  ......
    #           0       0    male    22
    # 1         1       1  female  38.0
    # 2         1       3  female  26.0
    # 3         1       1  female  35.0
    # 4         0       3    male  35.0

    print titanic.groupby('sex')[['survived']].mean()
    #         survived
    # sex
    # female  0.742038
    # male    0.188908

    print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
    # class      First    Second     Third
    # sex
    # female  0.968085  0.921053  0.500000
    # male    0.368852  0.157407  0.135447

    print titanic.pivot_table('survived', index='sex', columns='class')
    # class      First    Second     Third
    # sex
    # female  0.968085  0.921053  0.500000
    # male    0.368852  0.157407  0.135447

    age = pd.cut(titanic['age'], [0, 18, 80])
    print titanic.pivot_table('survived', ['sex', age], 'class')
    # class               First    Second     Third
    # sex    age
    # female (0, 18]   0.909091  1.000000  0.511628
    #        (18, 80]  0.972973  0.900000  0.423729
    # male   (0, 18]   0.800000  0.600000  0.215686
    #        (18, 80]  0.375000  0.071429  0.133663

    print titanic.pivot_table(index='sex', columns='class',
                              aggfunc={'survived': sum, 'fare': 'mean'})

    print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
    # class      First    Second     Third       All
    # sex
    # female  0.968085  0.921053  0.500000  0.742038
    # male    0.368852  0.157407  0.135447  0.188908
    # All     0.629630  0.472826  0.242363  0.383838