def calculate_word_count_stats(articles: pd.DataFrame):
"""Calculate aggregate word count statistics on each source's articles."""
by_source = articles.groupby(['base_url'])['word_count']
by_source = by_source.agg(['count', 'mean', 'std'])
by_source.sort_values('count', ascending=False, inplace=True)
print_full(by_source)
top_sources = by_source.head(10).index
top_counts = by_source.reset_index()[by_source.index.isin(top_sources)]
sns.barplot(x='base_url', y='count', data=top_counts)
sns.plt.show()
sns.boxplot(x='base_url', y='word_count',
data=articles[articles['base_url'].isin(top_sources)])
sns.plt.show()
评论列表
文章目录