call_readclouds.py 文件源码-python代码片段

def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None, 
                   min_reads_per_frag=1):
    if start is not None:
        if start < 0:
            raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end))
    if end is not None:
        if start >= end:
            raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end))

    readclouds_path = os.path.join(
        options.results_dir,
        "CombineReadcloudsStep",
        "readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id))

    tabix = pysam.TabixFile(readclouds_path)

    if chrom is not None and chrom not in tabix.contigs:
        print("MISSING:", chrom)
        return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split())

    if usecols is not None and "num_reads" not in usecols:
        usecols.append("num_reads")

    s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
    readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols)
    readclouds["chrom"] = readclouds["chrom"].astype("string")

    if min_reads_per_frag > 0:
        readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag]

    return readclouds