peakcaller.bak2.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:CLAM 作者: Xinglab 项目源码 文件源码
def peakcaller(tmp_dir, out_dir, gtf_fp, unique_only=False, with_replicates=False, with_control=False, unstranded=False):
    """DOCSTRING
    Args:
    Returns:
    """
    # file handlers
    mbam = pysam.Samfile(os.path.join(out_dir, 'realigned.sorted.bam'),'rb')
    ubam = pysam.Samfile(os.path.join(tmp_dir, 'unique.sorted.bam'),'rb')
    bam_dict = {'ubam.ip':[ubam], 'mbam.ip':[mbam]}
    if unique_only:
        ofile = open(os.path.join(out_dir, 'narrow_peaks.unique.bed'), 'w')
    else:
        ofile = open(os.path.join(out_dir, 'narrow_peaks.combined.bed'), 'w')

    # read in GTF
    logger.info('read gtf from "%s" '% fn)
    gene_annot = read_gtf(gtf_fp)

    # fetch tag counts in each gene
    ##  gene_name: { 'ip': bin_interval_ip, 'con': bin_interval_con }
    logger.info('reading gene counts')
    gene_count = defaultdict(dict)
    for gene_name in tqdm(gene_annot):
        gene = gene_annot[gene_name]
        bin_interval_ip, bin_interval_con = \
            gene_to_count(bam_dict, gene, 
                unique_only=unique_only, with_control=with_control, 
                unstranded=unstranded)
        if bin_interval_ip is None:  ## if no IP reads in this gene
            continue
        gene_count[gene_name]['ip'] = bin_interval_ip
        gene_count[gene_name]['con'] = bin_interval_con

    # estimate global overdispersion param. for each dataset
    logger.info('estimating dispersion parameter')
    alpha_ip_vec = estim_dispersion_param(len(bam_dict['ubam.ip']), gene_count, 'ip' )
    if with_control:
        alpha_con_vec = estim_dispersion_param(len(bam_dict['ubam.con']), gene_count, 'con' )
    else:
        alpha_con_vec = np.asarray(alpha_ip_vec)

    # perform statistical test
    logger.info('calling peaks')
    for gene_name in gene_to_count: 
        gene = gene_annot[gene_name]
        BED = test_gene_bin(gene, gene_count[gene_name]['ip'], gene_count[gene_name]['con'],
            alpha_ip_vec, alpha_con_vec)
        ofile.write(BED)
        peak_counter += len(BED.split('\n'))
    ofile.close()
    logger.info('called %i peaks'%peak_counter)

    return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号