def titanic_1():
titanic = sns.load_dataset('titanic')
print titanic.head()
# survived pclass sex age ......
# 0 0 male 22
# 1 1 1 female 38.0
# 2 1 3 female 26.0
# 3 1 1 female 35.0
# 4 0 3 male 35.0
print titanic.groupby('sex')[['survived']].mean()
# survived
# sex
# female 0.742038
# male 0.188908
print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
# class First Second Third
# sex
# female 0.968085 0.921053 0.500000
# male 0.368852 0.157407 0.135447
print titanic.pivot_table('survived', index='sex', columns='class')
# class First Second Third
# sex
# female 0.968085 0.921053 0.500000
# male 0.368852 0.157407 0.135447
age = pd.cut(titanic['age'], [0, 18, 80])
print titanic.pivot_table('survived', ['sex', age], 'class')
# class First Second Third
# sex age
# female (0, 18] 0.909091 1.000000 0.511628
# (18, 80] 0.972973 0.900000 0.423729
# male (0, 18] 0.800000 0.600000 0.215686
# (18, 80] 0.375000 0.071429 0.133663
print titanic.pivot_table(index='sex', columns='class',
aggfunc={'survived': sum, 'fare': 'mean'})
print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
# class First Second Third All
# sex
# female 0.968085 0.921053 0.500000 0.742038
# male 0.368852 0.157407 0.135447 0.188908
# All 0.629630 0.472826 0.242363 0.383838
评论列表
文章目录