fast5_to_fastq.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:Fast5-to-Fastq 作者: rrwick 项目源码 文件源码
def get_best_fastq_hdf5_location(hdf5_file, names):
    """
    This function returns the path in the FAST5 file to the best FASTQ. If there are multiple
    basecall locations, it returns the last one (hopefully from the most recent basecalling).
    """
    basecall_locations = sorted([x for x in names if x.upper().endswith('FASTQ')])
    two_d_locations = [x for x in basecall_locations if 'BASECALLED_2D' in x.upper()]
    template_locations = [x for x in basecall_locations if 'TEMPLATE' in x.upper()]
    complement_locations = [x for x in basecall_locations if 'COMPLEMENT' in x.upper()]

    # If the read has 2D basecalling, then that's what we use.
    if two_d_locations:
        return two_d_locations[-1]

    # If the read has both template and complement basecalling, then we choose the best based on
    # mean qscore.
    elif template_locations and complement_locations:
        template_location = template_locations[-1]
        complement_location = complement_locations[-1]
        mean_template_qscore = get_mean_score(hdf5_file, template_location)
        mean_complement_qscore = get_mean_score(hdf5_file, complement_location)
        if mean_template_qscore >= mean_complement_qscore:
            return template_location
        else:
            return complement_location

    # If the read has only template basecalling (normal for 1D) or only complement, then that's
    # what we use.
    elif template_locations:
        return template_locations[-1]
    elif complement_locations:
        return complement_locations[-1]

    # If the read has none of the above, but still has a fastq value in its hdf5, that's weird, but
    # we'll consider it a 1d read and use it.
    elif basecall_locations:
        return basecall_locations[-1]

    return None
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号