def bam_to_alignments(truth_bam, ref_name, start=None, end=None):
"""Create list of TruthAlignment objects from a bam of Truth aligned to ref.
:param truth_bam: (sorted indexed) bam with true sequence aligned to reference
:param ref: name of reference to process
:param start: starting position within reference
:param end: ending position within reference
(all alignments with any overlap with the interval start:end will be retrieved)
:returns: tuple(positions, encoded_label_array)
- positions: numpy structured array with 'ref_major'
(reference position index) and 'ref_minor'
(trailing insertion index) fields.
- feature_array: 1D numpy array of encoded labels
"""
with pysam.AlignmentFile(truth_bam, 'rb') as bamfile:
aln_reads = bamfile.fetch(reference=ref_name, start=start, end=end)
alignments = [TruthAlignment(r) for r in aln_reads if not (r.is_unmapped or r.is_secondary)]
alignments.sort(key=attrgetter('start'))
return alignments
评论列表
文章目录