def generate_overlaps(all_info):
b = all_info
b['short_style'] = b['style'].apply(lambda x : str(x).lower().replace('art ', '').replace(' art', ''))
b['short_style'] = b['short_style'].apply(lambda x : x.split(' ')[-1])
b['short_style'] = b['short_style'].apply(lambda x : x.replace('(', '').replace(')', '').lower())
q = b.groupby('short_style').artist.nunique().reset_index()
np.mean(q.artist > 1)
d = b.groupby(['artist', 'short_style']).size().reset_index()
e = pd.pivot_table(d, index='artist', columns='short_style', values=0, fill_value=0)
f = 1.0*e.iloc[:, 1:].div(e.iloc[:, 1:].sum(axis=1), axis=0)
n = f.shape[0]
overlaps = np.ones((n,n))
for i in xrange(1, n):
for j in xrange(i+1, n):
overlaps[i,j] = np.sum(f.iloc[i, :].values * f.iloc[j, :].values)
overlaps[j,i] = overlaps[i,j]
return overlaps, f.index
plot_artist_style_overlaps.py 文件源码
python
阅读 41
收藏 0
点赞 0
评论 0
评论列表
文章目录