10x_barcode_profiling.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:Topsorter 作者: hanfang 项目源码 文件源码
def buildcov(self):
       """
          Builds Barcode counts of the reads aligned to the genome
          input : bam_file and bed_file
          output : Returns bed file with Chr, start, end, barcode, no of occurences, strand
       """
       before = datetime.now()
       barcode_pattern = re.compile(r'BX:(A-Z)?:[ACGT]*-[0-9]')
       self.__LOGGER.info("WARNING: Dropping Reads Alignments that are not barcoded")
       output_bed = open(self.barcode_bed,"wb") 
       with open(self.bed_file) as regions:
             with pysam.Samfile(self.obam_file, "rb") as samfile:
             for line in regions:
            chr, start, end , info = line.rstrip('\n').rstrip('\r').split("\t")
            b_dict = {}
            try:
                        readsinregion=samfile.fetch(chr,int(start),int(end))
                        for read in readsinregion:
                    tags = dict(read.tags)
                    if 'BX' in tags.keys(): 
                    if tags['BX'] in b_dict: b_dict[tags['BX']] += 1
                    else: b_dict[tags['BX']] = 1
                    except:
                        self.__LOGGER.info("Skipping region : {0}:{1}:{2}".format(chr,start,end))
                        continue 
                for k, v in b_dict.items():
                        output_bed.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chr,start,end,info,k,v)) 

       regions.close()
       samfile.close()
       output_bed.close()
       after = datetime.now()
       self.__LOGGER.info('Computed Barcode profiles for the vcf regions completed in {0}'.format(str(after - before)))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号