def global_stats(articles: pd.DataFrame):
"""Calculate global stats on article db."""
print(f'Number of articles: {len(articles):,}')
num_sources = len(pd.value_counts(articles['base_url'], sort=False))
print(f'Number of news sources: {num_sources}')
mean_wc = articles['word_count'].mean()
print(f'Global mean word count: {mean_wc:.1f}')
missing_authors = (articles['authors'] == '').sum()
print(f'Missing authors: {missing_authors:,}')
missing_titles = (articles['title'] == '').sum()
print(f'Missing titles: {missing_titles}')
missing_texts = (articles['text'] == '').sum()
print(f'Missing texts: {missing_texts:,}')
评论列表
文章目录