def plot_tendencies(word_list, pos_dic, bin_size, output_dir, file_name):
plt.figure()
dataframe_list = list()
for word in word_list:
if word not in pos_dic:
raise Exception('Word ' + word + ' not found')
df = pd.DataFrame(pos_dic[word], columns=['pos'])
df['bins'] = pd.cut(df['pos'], bins=range(0, 100 + bin_size, bin_size), labels=range(0, 100, bin_size))
df = df.groupby(['bins'])['bins'].count()
dataframe_list.append(df)
df_final = pd.DataFrame(pd.concat(dataframe_list, axis=1)).fillna(0)
df_final.columns = word_list
ax = df_final.plot()
ax.set_xlabel("Position (en % de la longueur de la description)")
ax.set_ylabel("Nombre d'occurrences")
plt.title('Position des mots dans les descriptions des offres', y=1.08)
plt.savefig(os.path.join(output_dir, file_name), bbox_inches='tight')
评论列表
文章目录