def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None,
min_reads_per_frag=1):
if start is not None:
if start < 0:
raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end))
if end is not None:
if start >= end:
raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end))
readclouds_path = os.path.join(
options.results_dir,
"CombineReadcloudsStep",
"readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id))
tabix = pysam.TabixFile(readclouds_path)
if chrom is not None and chrom not in tabix.contigs:
print("MISSING:", chrom)
return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split())
if usecols is not None and "num_reads" not in usecols:
usecols.append("num_reads")
s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols)
readclouds["chrom"] = readclouds["chrom"].astype("string")
if min_reads_per_frag > 0:
readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag]
return readclouds
评论列表
文章目录