def get_all_species(self):
import tempfile
outfile = tempfile.mktemp() + '.txt.gz'
try:
self.logger.info('Downloading "species.txt.gz"...')
out_f = open(outfile, 'wb')
ftp = FTP(self.__class__.ENSEMBL_FTP_HOST)
ftp.login()
species_file = '/pub/release-%s/mysql/ensembl_production_%s/species.txt.gz' % (self.release, self.release)
ftp.retrbinary("RETR " + species_file, out_f.write)
out_f.close()
self.logger.info('Done.')
#load saved file
self.logger.info('Parsing "species.txt.gz"...')
species_li = tab2list(outfile, (1, 2, 7), header=0) # db_name,common_name,taxid
species_li = [x[:-1] + [is_int(x[-1]) and int(x[-1]) or None] for x in species_li]
# as of ensembl 87, there are also mouse strains. keep only the "original" one
species_li = [s for s in species_li if not s[0].startswith("mus_musculus_")]
self.logger.info('Done.')
finally:
os.remove(outfile)
pass
return species_li
评论列表
文章目录