def generate_pileup_chunks(bams, ref_fasta, ref_name, start=0, end=None,
chunk_len=50000, overlap=1000):
"""Yield chunks of pileup(s).
:param bams: iterable of input .bam files.
:param ref_fasta: input .fasta file.
:param ref_name: name of reference within .bam to parse.
:param start: start reference coordinate.
:param end: end reference coordinate. If `None` the full extent of
the reference will be parsed.
:param chunk_len: int, chunk length in reference coordinates.
:param overlap: int, overlap of adjacent chunks in reference coordinates.
:yields: (tuple `Pileup` objects, None)
..note:: the None in the yielded values is for compatibility with an
old API and will be removed.
"""
tview = MultiTView(bams, ref_fasta, columns=chunk_len)
if end is None:
align_fhs = (pysam.AlignmentFile(bam) for bam in bams)
end = min([dict(zip(p.references, p.lengths))[ref_name] for p in align_fhs])
logger.info("Chunking {}: {}-{} in chunks of {} overlapping by {}".format(ref_name, start, end, chunk_len, overlap))
#TODO: we could change how this is done since the class could cache
# everything in the entire region of interest.
for chunk_start, chunk_end in segment_limits(start, end, segment_len=chunk_len, overlap_len=overlap):
try:
pileups, ref_seq = tview.pileup(ref_name, chunk_start, chunk_end)
except TViewException:
logger.info("Skipping region {}-{} as TView did not find any reads".format(chunk_start, chunk_end))
else:
yield LabelledPileup(pileups=pileups, labels=None, ref_seq=ref_seq)
评论列表
文章目录