preprocessor.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:CLAM 作者: Xinglab 项目源码 文件源码
def filter_bam_maxtags(obam_fn, ibam_fn, max_tags=1):
    """DOCSTRING
    Args
    Returns
    """
    assert max_tags>0
    # prepare files
    ibam = pysam.Samfile(ibam_fn, 'rb')
    obam = pysam.Samfile(obam_fn, 'wb', template=ibam)
    # init 
    collapse_dict = defaultdict(list)
    chr_list=[x['SN'] for x in ibam.header['SQ']]
    input_counter = 0
    output_counter = 0

    for chr in chr_list:
        # empty stack for each new chromosome
        stack = []
        last_pos = -1
        for read in ibam.fetch(chr):
            input_counter += 1
            if not (input_counter % (5*(10**6)) ):
                logger.debug('collapsed %i alignments'%input_counter)
            if read.positions[0] > last_pos:
                new_alignment_list, collapse_dict = collapse_stack(stack, collapse_dict, max_tags)
                output_counter += len(new_alignment_list)
                last_pos = read.positions[0]
                stack = [read]
                for new_alignment in new_alignment_list:
                    new_alignment.query_sequence = '*'
                    new_alignment.query_qualities = '0'
                    _ = obam.write(new_alignment)
            else:
                stack.append(read)
        new_alignment_list, collapse_dict = collapse_stack(stack, collapse_dict, max_tags)
        output_counter += len(new_alignment_list)
        last_pos = read.positions[0]
        for new_alignment in new_alignment_list:
            new_alignment.query_sequence = '*'
            new_alignment.query_qualities = '0'
            _ = obam.write(new_alignment)
    ibam.close()
    obam.close()
    #os.rename(obam_fn, ibam_fn)
    #pysam.sort(obam_fn)
    pysam.index(obam_fn)
    logger.info('Input = %s; Output = %s; Redundancy = %.2f'%(input_counter,output_counter, 1-float(output_counter)/input_counter))
    return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号