def get_manuscript_stats(text, citation_df):
"""
Compute manuscript statistics.
"""
stats = collections.OrderedDict()
# Number of distinct references by type
ref_counts = (
citation_df
.standard_citation
.drop_duplicates()
.map(lambda x: x.split(':')[0])
.pipe(collections.Counter)
)
ref_counts['total'] = sum(ref_counts.values())
stats['reference_counts'] = ref_counts
stats['word_count'] = len(text.split())
logging.info(f"Generated manscript stats:\n{json.dumps(stats, indent=2)}")
return stats
评论列表
文章目录