def get_ref_microbe_taxids():
"""
Downloads the latest bacterial genome assembly summary from the NCBI genome
ftp site and generate a list of taxids of the bacterial reference genomes.
:return:
"""
import urllib
import csv
urlbase = 'ftp://ftp.ncbi.nlm.nih.gov'
urlextension = '/genomes/refseq/bacteria/assembly_summary.txt'
assembly = urllib.urlopen(urlbase + urlextension)
datareader = csv.reader(assembly.read().splitlines(), delimiter="\t")
taxid = []
for row in datareader:
if row[4] == 'reference genome':
taxid.append(row[5])
ts = get_timestamp()
dump(taxid, "ref_microbe_taxids_{}.pyobj".format(ts))
return taxid
评论列表
文章目录