def calculate_improvement_metrics(final_consensus, scored_df, tm_eval_df, hgm_df, metrics):
"""For coding transcripts, how much did we improve the metrics?"""
tm_df = tm_eval_df.reset_index()[['TransMapOriginalIntronsPercent', 'TranscriptId']]
hgm_df_subset = hgm_df[hgm_df['AlignmentId'].apply(tools.nameConversions.aln_id_is_transmap)]
hgm_df_subset = hgm_df_subset[['TranscriptId', 'IntronAnnotSupportPercent', 'IntronRnaSupportPercent']]
tm_df = pd.merge(tm_df, hgm_df_subset, on='TranscriptId')
df = pd.merge(tm_df, scored_df.reset_index(), on='TranscriptId', suffixes=['TransMap', ''])
df = df.drop_duplicates(subset='AlignmentId') # why do I need to do this?
df = df.set_index('AlignmentId')
metrics['Evaluation Improvement'] = {'changes': [], 'unchanged': 0}
for aln_id, c in final_consensus:
if c['transcript_biotype'] != 'protein_coding':
continue
if 'transMap' in c['transcript_modes']:
metrics['Evaluation Improvement']['unchanged'] += 1
continue
tx_s = df.ix[aln_id]
metrics['Evaluation Improvement']['changes'].append([tx_s.TransMapOriginalIntronsPercent,
tx_s.IntronAnnotSupportPercentTransMap,
tx_s.IntronRnaSupportPercentTransMap,
tx_s.OriginalIntronsPercent_mRNA,
tx_s.IntronAnnotSupportPercent,
tx_s.IntronRnaSupportPercent,
tx_s.TransMapGoodness,
tx_s.AlnGoodness_mRNA])
consensus.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录