def get_fasta_in_kraken_format(outfile_fasta='sequences.fa'):
output=open(outfile_fasta,'w')
for file_name in os.listdir(cwd):
if file_name.endswith('.gbff'):
records = SeqIO.parse(file_name, "genbank")
for seq_record in records:
seq_id=seq_record.id
seq=seq_record.seq
for feature in seq_record.features:
if 'source' in feature.type:
print(feature.qualifiers)
taxid=''.join(feature.qualifiers['db_xref'])
taxid=re.sub(r'.*taxon:','kraken:taxid|',taxid)
print(''.join(taxid))
outseq=">"+seq_id+"|"+taxid+"\n"+str(seq)+"\n"
output.write(outseq)
os.remove(file_name)
output.close()
return
评论列表
文章目录