call_readclouds.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:grocsvs 作者: grocsvs 项目源码 文件源码
def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None, 
                   min_reads_per_frag=1):
    if start is not None:
        if start < 0:
            raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end))
    if end is not None:
        if start >= end:
            raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end))

    readclouds_path = os.path.join(
        options.results_dir,
        "CombineReadcloudsStep",
        "readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id))

    tabix = pysam.TabixFile(readclouds_path)

    if chrom is not None and chrom not in tabix.contigs:
        print("MISSING:", chrom)
        return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split())

    if usecols is not None and "num_reads" not in usecols:
        usecols.append("num_reads")

    s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
    readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols)
    readclouds["chrom"] = readclouds["chrom"].astype("string")

    if min_reads_per_frag > 0:
        readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag]

    return readclouds
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号