collect_only.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:LoReAn 作者: lfaino 项目源码 文件源码
def add_EVM(whole_fasta_name, output_filename, output_merged_fasta_name):
    """
    this module looks for genes that were not used in the consensus stage. usually are gene models without long reads
    support
    """
    sys.stdout.write('\t###APPEND EVM NOT USED FROM CONTIGS BUILDING###\n')
    '''Adds the EVM records that are not present in the final contig evidence'''
    whole_fasta = open(whole_fasta_name, 'r')
    out_fasta_file = open(output_filename, 'r')
    outputMerged = open(output_merged_fasta_name, 'w')
    wholeDict = SeqIO.to_dict(SeqIO.parse(whole_fasta, 'fasta'))
    count = 0
    dictOut = {}
    outFasta = SeqIO.parse(out_fasta_file, 'fasta')
    for record in outFasta:
        if record.id in dictOut:
            dictOut[str(record.id) + '_' + str(count)] = str(record.seq)
            count += 1
        else:
            dictOut[record.id] = str(record.seq)
    for key in list(wholeDict.keys()):
        if 'evm' in key and key not in dictOut:
            ident = '>Gene' + str(count) + '_' + key
            outputMerged.write(
                ident + '\n' + str(wholeDict[key].seq) + '\n')
            count += 1
    for key, element in list(dictOut.items()):
        ident = '>Gene' + str(count) + '_' + key
        outputMerged.write(ident + '\n' + str(element) + '\n')
        count += 1

    whole_fasta.close()
    outFasta.close()
    outputMerged.close()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号