def tsne_cluster_cuisine(df,sublist):
lenlist=[0]
df_sub = df[df['cuisine']==sublist[0]]
lenlist.append(df_sub.shape[0])
for cuisine in sublist[1:]:
temp = df[df['cuisine']==cuisine]
df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
lenlist.append(df_sub.shape[0])
df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
print df_X.shape, lenlist
dist = squareform(pdist(df_X, metric='cosine'))
tsne = TSNE(metric='precomputed').fit_transform(dist)
palette = sns.color_palette("hls", len(sublist))
plt.figure(figsize=(10,10))
for i,cuisine in enumerate(sublist):
plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
plt.legend()
#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
评论列表
文章目录