def download_gbk(assemb_tab, cmd, outdir = '.'):
'''
This should take a list of accessions and paths, and produce a
source/destination pairs file.
It is then possible to use the --file-pair-file to download all at once,
and we can add the following flags to the ascp command:
--overwrite=diff -k2
the -k2 means files are compared with sparse checksums.
'''
fo = open("aspera_assemblies_src_dest.txt", 'w')
assembs_dic_list = [parse_aseemb_rows(row) for row in assemb_tab.itertuples()]
for assembly in assembs_dic_list:
fo.write("{}\n{}\n".format(assembly['source'], assembly['dest']))
fo.close()
cmd = cmd.format(outdir, 'aspera_assemblies_src_dest.txt', outdir)
print("Running the aspera cmd: {}".format(cmd), file = sys.stderr)
p = subprocess.Popen( shlex.split(cmd))
p.communicate()
files_to_check = [a['gbk_file'] for a in assembs_dic_list]
was_updated = parse_aspera_manifest_file(outdir, files_to_check, "aspera_assemblies_manifest.txt")
print("Finished downloading all genomes.", file = sys.stderr)
return assembs_dic_list
### LOADING GENOMES TO THE KRAKEN STAGGING AREA ################################
评论列表
文章目录