def download_ref(build, chrom):
'''Download a chromosome reference file, and remove the header and newlines so we can seek to positions.'''
dest_filepath = ref_filepath(build, chrom, download=False)
if os.path.exists(dest_filepath):
return
dl_filepath = get_tmp_path('dl-chrom-{}-{}'.format(build['hg'], chrom))
if not os.path.exists(dl_filepath):
dl_tmp_filepath = get_tmp_path(dl_filepath)
url = 'ftp://hgdownload.cse.ucsc.edu/goldenPath/{}/chromosomes/chr{}.fa.gz'.format(build['hg'], chrom)
wget.download(url=url, out=dl_tmp_filepath)
print('')
os.rename(dl_tmp_filepath, dl_filepath)
tmp_filepath = get_tmp_path(dest_filepath)
run_script(r'''
gzip -cd '{dl_filepath}' |
tail -n +2 |
tr -d "\n" > '{tmp_filepath}'
'''.format(dl_filepath=dl_filepath, tmp_filepath=tmp_filepath))
os.rename(tmp_filepath, dest_filepath)
print("ref is at", dest_filepath)
评论列表
文章目录