def parse_fusion_bam(bam_f):
fusions = {}
bam = pysam.AlignmentFile(bam_f, 'rb')
for read in bam:
if read.is_secondary: # not the primary alignment
continue
if not read.has_tag('XF'): # not fusion junctions
continue
chr1, chr2 = read.get_tag('XF').split()[1].split('-')
if chr1 != chr2: # not on the same chromosome
continue
strand = '+' if not read.is_reverse else '-'
if read.query_name not in fusions: # first fragment
fusions[read.query_name] = [chr1, strand, read.reference_start,
read.reference_end]
else: # second fragment
if chr1 == fusions[read.query_name][0] \
and strand == fusions[read.query_name][1]:
yield [chr1, strand, read.reference_start, read.reference_end]
yield fusions[read.query_name]
bam.close()
评论列表
文章目录