realigner.bak.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:CLAM 作者: Xinglab 项目源码 文件源码
def realigner(out_dir, tmp_dir, winsize=50, unstranded=False):
    """DOCSTRING
    Args:
    Returns:
    """
    # file handlers
    mbam = pysam.Samfile(os.path.join(tmp_dir, 'multi.sorted.bam'),'rb')
    ubam = pysam.Samfile(os.path.join(tmp_dir, 'unique.sorted.bam'),'rb')
    obam = pysam.Samfile(os.path.join(out_dir, 'realigned.bam'), 'wb', template = mbam)
    chr_list=[x['SN'] for x in ubam.header['SQ']]

    # construct the mread_dict; this will be needed throughout
    mread_dict = defaultdict(list)
    for alignment in mbam:
        mread_dict[alignment.qname].append(alignment)

    # keep a record of processed reads
    processed_mreads = set()

    # iterate through all mreads
    for read_qname in mread_dict:
        if read_qname in processed_mreads:
            continue

        ## construct the fully-connected subgraph for each read
        read_to_locations, processed_mreads = \
            construct_subgraph(mbam, read_qname, mread_dict, processed_mreads, chr_list, winsize=winsize, unstranded=unstranded)
        subgraph = set()
        for read in read_to_locations:
            _ = map(subgraph.add, read_to_locations[read].keys())
        subgraph = list(subgraph)

        ## build the BIT tracks
        node_track, multi_reads_weights = \
            construct_BIT_track(subgraph, read_to_locations, ubam, unstranded)

        ## run EM
        multi_reads_weights = \
            run_EM(node_track, multi_reads_weights, w=winsize)

        ## write to obam
        for read in multi_reads_weights:
            for node in multi_reads_weights[read]:
                alignment = read_to_locations[read][node]
                score = round(multi_reads_weights[read][node][0], 3)
                alignment.set_tag('AS', score)
                alignment.set_tag('PG', 'CLAM')
                obam.write(alignment)
    # sort the final output
    logger.info('sorting output')
    obam.close()
    ubam.close()
    mbam.close()
    obam_sorted_fn = os.path.join(out_dir, 'realigned.sorted.bam')
    pysam.sort('-o', obam_sorted_fn, os.path.join(out_dir, 'realigned.bam'))
    pysam.index(obam_sorted_fn)
    os.remove(os.path.join(out_dir, 'realigned.bam'))
    return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号