def missing_rate_plot(consensus_data, ordered_genomes, biotypes, missing_plot_tgt):
"""Missing genes/transcripts"""
base_title = 'Number of missing orthologs in consensus set'
gene_missing_df = json_biotype_counter_to_df(consensus_data, 'Gene Missing')
gene_missing_df.columns = ['biotype', 'Genes', 'genome']
transcript_missing_df = json_biotype_counter_to_df(consensus_data, 'Transcript Missing')
transcript_missing_df.columns = ['biotype', 'Transcripts', 'genome']
df = transcript_missing_df.merge(gene_missing_df, on=['genome', 'biotype'])
df = pd.melt(df, id_vars=['biotype', 'genome'])
ylabel = 'Number of genes or transcripts'
with missing_plot_tgt.open('w') as outf, PdfPages(outf) as pdf:
tot_df = df.groupby(['genome', 'biotype', 'variable']).aggregate(sum).reset_index()
generic_barplot(tot_df, pdf, '', ylabel, base_title, x='genome', y='value',
col='variable', row_order=ordered_genomes)
for biotype in biotypes:
biotype_df = biotype_filter(df, biotype)
if biotype_df is None:
continue
biotype_df = biotype_df.groupby(['genome', 'variable']).aggregate(sum).reset_index()
title = base_title + ' for biotype {}'.format(biotype)
generic_barplot(biotype_df, pdf, '', ylabel, title, x='genome', y='value',
col='variable', row_order=ordered_genomes)
plots.py 文件源码
python
阅读 32
收藏 0
点赞 0
评论 0
评论列表
文章目录