def recover_unmapped_mut_info(mut_info, bed, sc, opts):
# retreive info based on annotated protein effects and genomic coordinates
has_unmapped_opts = ('use_unmapped' in opts) and ('genome' in opts)
use_unmapped = opts['use_unmapped'] and opts['genome']
if has_unmapped_opts and use_unmapped:
genome_fa = pysam.Fastafile(opts['genome'])
# try to still use mutations that are not on the reference transcript
tmp_mut_info = mut_info[mut_info['Coding Position'].isnull()]
unmapped_mut_info = get_unmapped_aa_mut_info(tmp_mut_info,
genome_fa,
bed.strand,
bed.chrom,
opts['context'])
genome_fa.close()
# fill in tumor sample/tumor type info
unmapped_mut_info['Tumor_Sample'] = tmp_mut_info['Tumor_Sample'].tolist()
unmapped_mut_info['Tumor_Type'] = tmp_mut_info['Tumor_Type'].tolist()
# filter out cases where the nucleotide context does not exist
# on the reference transcript
bad_contexts = [i for i in range(len(unmapped_mut_info['Context']))
if not sc.is_valid_context(unmapped_mut_info['Context'][i])]
for key in unmapped_mut_info:
unmapped_mut_info[key] = utils.filter_list(unmapped_mut_info[key],
bad_contexts)
else:
unmapped_mut_info = {'Context': [], 'Reference AA': [], 'Codon Pos': [],
'Somatic AA': [], 'Tumor_Allele': [],
'Tumor_Sample': [], 'Tumor_Type':[]}
return unmapped_mut_info
评论列表
文章目录