def run(argv):
gencode_filepath = get_generated_path('sites/genes/gencode-{}.gtf.gz'.format(genes_version))
genes_filepath = common_filepaths['genes']
if not os.path.exists(genes_filepath):
print('genes-{}.bed will be stored at {!r}'.format(genes_version, genes_filepath))
if not os.path.exists(gencode_filepath):
make_basedir(gencode_filepath)
wget.download(
url="ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh37_mapping/gencode.v25lift37.annotation.gtf.gz",
out=gencode_filepath
)
print('')
genes = get_all_genes(gencode_filepath)
genes = dedup_ensg(genes)
genes = dedup_symbol(genes)
make_basedir(genes_filepath)
with open(genes_filepath, 'w') as f:
writer = csv.DictWriter(f, delimiter='\t', fieldnames='chrom start end symbol ensg'.split(), lineterminator='\n')
writer.writerows(genes)
else:
print("gencode is at {!r}".format(genes_filepath))
评论列表
文章目录