def add_EVM(whole_fasta_name, output_filename, output_merged_fasta_name):
"""
this module looks for genes that were not used in the consensus stage. usually are gene models without long reads
support
"""
sys.stdout.write('\t###APPEND EVM NOT USED FROM CONTIGS BUILDING###\n')
'''Adds the EVM records that are not present in the final contig evidence'''
whole_fasta = open(whole_fasta_name, 'r')
out_fasta_file = open(output_filename, 'r')
outputMerged = open(output_merged_fasta_name, 'w')
wholeDict = SeqIO.to_dict(SeqIO.parse(whole_fasta, 'fasta'))
count = 0
dictOut = {}
outFasta = SeqIO.parse(out_fasta_file, 'fasta')
for record in outFasta:
if record.id in dictOut:
dictOut[str(record.id) + '_' + str(count)] = str(record.seq)
count += 1
else:
dictOut[record.id] = str(record.seq)
for key in list(wholeDict.keys()):
if 'evm' in key and key not in dictOut:
ident = '>Gene' + str(count) + '_' + key
outputMerged.write(
ident + '\n' + str(wholeDict[key].seq) + '\n')
count += 1
for key, element in list(dictOut.items()):
ident = '>Gene' + str(count) + '_' + key
outputMerged.write(ident + '\n' + str(element) + '\n')
count += 1
whole_fasta.close()
outFasta.close()
outputMerged.close()
评论列表
文章目录