def main():
uri, outfile, dataset = get_arguments()
fd = tempfile.NamedTemporaryFile()
progress = ProgressBar(widgets=[Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()])
def update(count, blockSize, totalSize):
if progress.maxval is None:
progress.maxval = totalSize
progress.start()
progress.update(min(count * blockSize, totalSize))
urllib.urlretrieve(uri, fd.name, reporthook = update)
if dataset == 'zinc12':
df = pandas.read_csv(fd.name, delimiter = '\t')
df = df.rename(columns={'SMILES':'structure'})
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
elif dataset == 'chembl22':
df = pandas.read_table(fd.name,compression='gzip')
df = df.rename(columns={'canonical_smiles':'structure'})
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
pass
else:
df = pandas.read_csv(fd.name, delimiter = '\t')
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
评论列表
文章目录