gfa.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:phasm 作者: AbeelLab 项目源码 文件源码
def gfa2_parse_segments_with_fragments(f: TextIO):
    """Parse all segments and fragments from a GFA2 file and store them in a
    dict.

    In PHASM fragments are used to denote merged reads."""

    segments = {}
    fragments = defaultdict(list)
    for line in f:
        if not line.startswith('S') and not line.startswith('F'):
            continue

        parts = line.strip().split('\t')

        line_type = parts[0].strip()
        segment_name = parts[1].strip()
        if line_type == 'S':
            segments[segment_name] = line

        if line_type == 'F':
            fragments[segment_name].append(gfa2_parse_fragment(line))

    reads = {}
    for segment, line in segments.items():
        if segment not in fragments:
            reads[segment] = gfa2_segment_to_read(line)
        else:
            read = gfa2_segment_to_read(line)
            length = len(read)
            fragment_reads = []
            prefix_lengths = []

            for fragment_info in sorted(fragments[segment],
                                        key=lambda elem: elem[2][0]):
                _, external_id, segment_range, fragment_range = fragment_info
                fragment_length = fragment_range[1] - fragment_range[0]
                fragment_reads.append(external_id)
                prefix_lengths.append(fragment_length)

            prefix_lengths.pop()
            reads[segment] = MergedFragment(read.id, length, fragment_reads,
                                            prefix_lengths)

    return reads
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号