CLAM.lite_aligner.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:CLAM 作者: Xinglab 项目源码 文件源码
def filter_multihits(filename, max_hits, verbose, tmp_dir):
    """
    Pre-processing function for cleaning up the input bam file.
    """
    if verbose:
        print_time_stamp('filtering multi-mapped up to %d hits.' % max_hits)
    multiread_set=set()
    subprocess.call("samtools view -h %s | awk '{if($2 !~ /_/ && $3 !~ /_/) {print}}' | samtools view -bS - > %s/filter_random.bam" % (filename, tmp_dir), shell=True)
    oldfile=pysam.Samfile(tmp_dir + '/filter_random.bam','rb')
    new_filename=os.path.abspath(tmp_dir + '/filter%d.bam' % max_hits)
    sorted_filename=os.path.abspath(tmp_dir + '/filter%d.sorted.bam' % max_hits)
    newfile=pysam.Samfile(new_filename, 'wb', template=oldfile)
    for aligned_read in oldfile:
        try:
            if aligned_read.opt("NH") < max_hits:
                newfile.write(aligned_read)
                if aligned_read.opt("NH")>1:
                    multiread_set.add(aligned_read.qname)
        except KeyError:
            newfile.write(aligned_read)
    oldfile.close()
    newfile.close()
    sort_cmd='samtools sort -T %s/ -o %s %s' % (tmp_dir, sorted_filename, new_filename)
    index_cmd='samtools index %s' % sorted_filename
    subprocess.call(sort_cmd, shell=True)
    subprocess.call(index_cmd, shell=True)
    subprocess.call('rm %s/filter_random.bam %s' % (tmp_dir, new_filename), shell=True)
    pickle.dump(multiread_set, open(tmp_dir + '/multiread_set.pdata', 'wb'), -1)
    return(sorted_filename, multiread_set)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号