def add_isolates(dic_list, db_name):
for assembly in dic_list:
print("Adding {} to kraken stagging area.".format(assembly['organism']), file = sys.stderr)
genbank_zip_file = assembly['dest']
fi = gzip.open( genbank_zip_file, 'rt')
seqs = list(SeqIO.parse( fi, 'genbank'))
new_seqs = []
for s in seqs:
tmp = SeqIO.SeqRecord(s.seq)
tmp.id = 'gi|{}'.format(s.annotations['gi'])
tmp.description = s.description
tmp.name = s.name
new_seqs.append(tmp)
fi.close()
fa_file = os.path.join(os.getcwd(), os.path.basename(genbank_zip_file).strip('gbff.gz') + ".fa")
tmpf = open(fa_file, 'wt')
SeqIO.write(new_seqs, tmpf, 'fasta')
tmpf.close()
kraken_add(db_name, fa_file)
# cmd = 'kraken-build --add-to-library {} --db {}'.format(fa_file, db_name)
# print(cmd, file = sys.stderr)
# cmd = shlex.split(cmd)
# p = subprocess.check_output(cmd)
# os.remove(fa_file)
print("Added all {} assemblies to kraken stagging area. DB is ready to build".format(len(dic_list)), file = sys.stderr)
### ACTUALLY BUILD THE DATABASE ################################################
评论列表
文章目录