dump.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:mygene.info 作者: biothings 项目源码 文件源码
def get_all_species(self):
        import tempfile
        outfile = tempfile.mktemp() + '.txt.gz'
        try:
            self.logger.info('Downloading "species.txt.gz"...')
            out_f = open(outfile, 'wb')
            ftp = FTP(self.__class__.ENSEMBL_FTP_HOST)
            ftp.login()
            species_file = '/pub/release-%s/mysql/ensembl_production_%s/species.txt.gz' % (self.release, self.release)
            ftp.retrbinary("RETR " + species_file, out_f.write)
            out_f.close()
            self.logger.info('Done.')

            #load saved file
            self.logger.info('Parsing "species.txt.gz"...')
            species_li = tab2list(outfile, (1, 2, 7), header=0)   # db_name,common_name,taxid
            species_li = [x[:-1] + [is_int(x[-1]) and int(x[-1]) or None] for x in species_li]
            # as of ensembl 87, there are also mouse strains. keep only the "original" one
            species_li = [s for s in species_li if not s[0].startswith("mus_musculus_")]
            self.logger.info('Done.')
        finally:
            os.remove(outfile)
            pass

        return species_li
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号