def calculate_plate_summaries(self):
"""Get mean reads, percent mapping, etc summaries for each plate"""
well_map = self.cell_metadata.groupby(Plates.SAMPLE_MAPPING)
# these stats are from STAR mapping
star_cols = ['Number of input reads', 'Uniquely mapped reads number']
star_stats = self.mapping_stats[star_cols].groupby(
self.cell_metadata[Plates.SAMPLE_MAPPING]).sum()
total_reads = star_stats['Number of input reads']
unique_reads = star_stats['Uniquely mapped reads number']
percent_ercc = well_map.sum()['ercc'].divide(total_reads, axis=0)
percent_mapped_reads = unique_reads / total_reads - percent_ercc
plate_summaries = pd.DataFrame(OrderedDict([
(Plates.MEAN_READS_PER_CELL, total_reads / well_map.size()),
(Plates.MEDIAN_GENES_PER_CELL, well_map.median()['n_genes']),
('Percent not uniquely aligned', 100 * well_map.sum()['alignment_not_unique'].divide(total_reads, axis=0)),
(Plates.PERCENT_MAPPED_READS, 100 * percent_mapped_reads),
('Percent no feature', 100 * well_map.sum()['no_feature'].divide(total_reads, axis=0)),
('Percent Rn45s', 100 * self.genes['Rn45s'].groupby(
self.cell_metadata[Plates.SAMPLE_MAPPING]).sum() / total_reads),
(Plates.PERCENT_ERCC, 100 * percent_ercc),
('n_wells', well_map.size())
]))
return plate_summaries
评论列表
文章目录