def buildcov(self):
"""
Builds Barcode counts of the reads aligned to the genome
input : bam_file and bed_file
output : Returns bed file with Chr, start, end, barcode, no of occurences, strand
"""
before = datetime.now()
barcode_pattern = re.compile(r'BX:(A-Z)?:[ACGT]*-[0-9]')
self.__LOGGER.info("WARNING: Dropping Reads Alignments that are not barcoded")
output_bed = open(self.barcode_bed,"wb")
with open(self.bed_file) as regions:
with pysam.Samfile(self.obam_file, "rb") as samfile:
for line in regions:
chr, start, end , info = line.rstrip('\n').rstrip('\r').split("\t")
b_dict = {}
try:
readsinregion=samfile.fetch(chr,int(start),int(end))
for read in readsinregion:
tags = dict(read.tags)
if 'BX' in tags.keys():
if tags['BX'] in b_dict: b_dict[tags['BX']] += 1
else: b_dict[tags['BX']] = 1
except:
self.__LOGGER.info("Skipping region : {0}:{1}:{2}".format(chr,start,end))
continue
for k, v in b_dict.items():
output_bed.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chr,start,end,info,k,v))
regions.close()
samfile.close()
output_bed.close()
after = datetime.now()
self.__LOGGER.info('Computed Barcode profiles for the vcf regions completed in {0}'.format(str(after - before)))
评论列表
文章目录