def flavor_profile(df,ingr,comp,ingr_comp):
sorted_ingredients = df.columns
underscore_ingredients=[]
for item in sorted_ingredients:
underscore_ingredients.append(item.replace(' ','_'))
print len(underscore_ingredients), len(sorted_ingredients)
ingr_total = ingr_comp.join(ingr,how='right',on='# ingredient id')
ingr_total = ingr_total.join(comp,how='right',on='compound id')
ingr_pivot = pd.crosstab(ingr_total['ingredient name'],ingr_total['compound id'])
ingr_flavor = ingr_pivot[ingr_pivot.index.isin(underscore_ingredients)]
df_flavor = df.values.dot(ingr_flavor.values)
print df.shape, df_flavor.shape
return df_flavor
#normalize flavor matrix with tfidf method
评论列表
文章目录