def cigar_parse(self, tuples):
"""
arguments:
<tuples> a CIGAR string tuple list in pysam format
purpose:
This function uses the pysam cigarstring tuples format and returns
a list of tuples in the internal format, [(20, 'M'), (5, "I")], et
cetera. The zeroth element of each tuple is the number of bases for the
CIGAR string feature. The first element of each tuple is the CIGAR
string feature type.
There are several feature types in SAM/BAM files. See below:
'M' - match
'I' - insertion relative to reference
'D' - deletion relative to reference
'N' - skipped region from the reference
'S' - soft clip, not aligned but still in sam file
'H' - hard clip, not aligned and not in sam file
'P' - padding (silent deletion from padded reference)
'=' - sequence match
'X' - sequence mismatch
'B' - BAM_CBACK (I don't actually know what this is)
"""
# I used the map values from http://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment
psam_to_char = {0: 'M', 1: 'I', 2: 'D', 3: 'N', 4: 'S',
5: 'H', 6: 'P', 7: '=', 8: 'X', 9: 'B'}
return [(value, psam_to_char[feature]) for feature, value in tuples]
评论列表
文章目录