def gfa2_parse_segments_with_fragments(f: TextIO):
"""Parse all segments and fragments from a GFA2 file and store them in a
dict.
In PHASM fragments are used to denote merged reads."""
segments = {}
fragments = defaultdict(list)
for line in f:
if not line.startswith('S') and not line.startswith('F'):
continue
parts = line.strip().split('\t')
line_type = parts[0].strip()
segment_name = parts[1].strip()
if line_type == 'S':
segments[segment_name] = line
if line_type == 'F':
fragments[segment_name].append(gfa2_parse_fragment(line))
reads = {}
for segment, line in segments.items():
if segment not in fragments:
reads[segment] = gfa2_segment_to_read(line)
else:
read = gfa2_segment_to_read(line)
length = len(read)
fragment_reads = []
prefix_lengths = []
for fragment_info in sorted(fragments[segment],
key=lambda elem: elem[2][0]):
_, external_id, segment_range, fragment_range = fragment_info
fragment_length = fragment_range[1] - fragment_range[0]
fragment_reads.append(external_id)
prefix_lengths.append(fragment_length)
prefix_lengths.pop()
reads[segment] = MergedFragment(read.id, length, fragment_reads,
prefix_lengths)
return reads
评论列表
文章目录