consensus.py 文件源码-python代码片段

def calculate_improvement_metrics(final_consensus, scored_df, tm_eval_df, hgm_df, metrics):
    """For coding transcripts, how much did we improve the metrics?"""
    tm_df = tm_eval_df.reset_index()[['TransMapOriginalIntronsPercent', 'TranscriptId']]
    hgm_df_subset = hgm_df[hgm_df['AlignmentId'].apply(tools.nameConversions.aln_id_is_transmap)]
    hgm_df_subset = hgm_df_subset[['TranscriptId', 'IntronAnnotSupportPercent', 'IntronRnaSupportPercent']]
    tm_df = pd.merge(tm_df, hgm_df_subset, on='TranscriptId')
    df = pd.merge(tm_df, scored_df.reset_index(), on='TranscriptId', suffixes=['TransMap', ''])
    df = df.drop_duplicates(subset='AlignmentId')  # why do I need to do this?
    df = df.set_index('AlignmentId')
    metrics['Evaluation Improvement'] = {'changes': [], 'unchanged': 0}
    for aln_id, c in final_consensus:
        if c['transcript_biotype'] != 'protein_coding':
            continue
        if 'transMap' in c['transcript_modes']:
            metrics['Evaluation Improvement']['unchanged'] += 1
            continue
        tx_s = df.ix[aln_id]
        metrics['Evaluation Improvement']['changes'].append([tx_s.TransMapOriginalIntronsPercent,
                                                             tx_s.IntronAnnotSupportPercentTransMap,
                                                             tx_s.IntronRnaSupportPercentTransMap,
                                                             tx_s.OriginalIntronsPercent_mRNA,
                                                             tx_s.IntronAnnotSupportPercent,
                                                             tx_s.IntronRnaSupportPercent,
                                                             tx_s.TransMapGoodness,
                                                             tx_s.AlnGoodness_mRNA])