def assignReadRefAlt(x, bam):
'''
Given a VCF record and a bam, return the read names that support either the REF or ALT alleles
:param x: VCF record python object
:param bam: BAM file handle
:return: dictionary containing two lists of strings, ref/alt.
'''
iter = input_bam.pileup(x.contig, x.start,
x.stop) # Iterable mpileup, covering a much wider genomic range, from start of left-most read
output_dict = {'ref': None, 'alt': None, 'nomatch': 0, 'coverage': None} # Initialise output object
for pileupcolumn in iter:
ref_reads = []
alt_reads = []
if pileupcolumn.pos == x.start:
for pileupread in pileupcolumn.pileups:
if not pileupread.is_del and not pileupread.is_refskip:
a_read = pileupread.alignment # pysam.AlignedSegment
base_at_pos = a_read.query_sequence[pileupread.query_position]
#print base_at_pos, x.alleles, x.info["AF"]
if base_at_pos == x.ref[0]:
ref_reads.append(a_read.query_name) # Read matches ref at pos
elif base_at_pos == x.alts[0]:
alt_reads.append(a_read.query_name) # Read matches alt at pos
else:
output_dict['nomatch'] += 1 ## Allele matches neither ref/alt
output_dict['ref'] = ref_reads
output_dict['alt'] = alt_reads
output_dict['coverage'] = float(pileupcolumn.n)
return output_dict
评论列表
文章目录