download_dataset.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:keras-molecules 作者: maxhodak 项目源码 文件源码
def main():
    uri, outfile, dataset = get_arguments()
    fd = tempfile.NamedTemporaryFile()
    progress = ProgressBar(widgets=[Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()])

    def update(count, blockSize, totalSize):
        if progress.maxval is None:
            progress.maxval = totalSize
            progress.start()
        progress.update(min(count * blockSize, totalSize))

    urllib.urlretrieve(uri, fd.name, reporthook = update)
    if dataset == 'zinc12':
        df = pandas.read_csv(fd.name, delimiter = '\t')
        df = df.rename(columns={'SMILES':'structure'})
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
    elif dataset == 'chembl22':
        df = pandas.read_table(fd.name,compression='gzip')
        df = df.rename(columns={'canonical_smiles':'structure'})
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
        pass
    else:
        df = pandas.read_csv(fd.name, delimiter = '\t')
        df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号