def prepare_data():
make_dirs("data/cache")
make_dirs("data/embedding/char")
make_dirs("data/embedding/word")
make_dirs("data/squad")
make_dirs("data/trained_model")
make_dirs("checkpoint")
nltk.download("punkt")
train_filename = "train-v1.1.json"
dev_filename = "dev-v1.1.json"
squad_base_url = "https://rajpurkar.github.io/SQuAD-explorer/dataset/"
train_url = os.path.join(squad_base_url, train_filename)
dev_url = os.path.join(squad_base_url, dev_filename)
download_prefix = os.path.join("data", "squad")
maybe_download(train_url, download_prefix, train_filename)
maybe_download(dev_url, download_prefix, dev_filename)
char_embedding_pretrain_url = "https://raw.githubusercontent.com/minimaxir/char-embeddings/master/glove.840B.300d-char.txt"
char_embedding_filename = "glove_char.840B.300d.txt"
maybe_download(char_embedding_pretrain_url, "data/embedding/char", char_embedding_filename)
评论列表
文章目录