def get_full_alignment_base_quality_scores(read):
"""
Returns base quality scores for the full read alignment, inserting zeroes for deletions and removing
inserted and soft-clipped bases. Therefore, only returns quality for truly aligned sequenced bases.
Args:
read (pysam.AlignedSegment): read to get quality scores for
Returns:
np.array: numpy array of quality scores
"""
quality_scores = np.fromstring(read.qual, dtype=np.byte) - tk_constants.ILLUMINA_QUAL_OFFSET
start_pos = 0
for operation,length in read.cigar:
operation = cr_constants.cigar_numeric_to_category_map[operation]
if operation == 'D':
quality_scores = np.insert(quality_scores, start_pos, [0] * length)
elif operation == 'I' or operation == 'S':
quality_scores = np.delete(quality_scores, np.s_[start_pos:start_pos + length])
if not operation == 'I' and not operation == 'S':
start_pos += length
return start_pos, quality_scores
评论列表
文章目录