def load_data(opt):
with open('SQuAD/meta.msgpack', 'rb') as f:
meta = msgpack.load(f, encoding='utf8')
embedding = torch.Tensor(meta['embedding'])
opt['pretrained_words'] = True
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
opt['pos_size'] = len(meta['vocab_tag'])
opt['ner_size'] = len(meta['vocab_ent'])
with open(args.data_file, 'rb') as f:
data = msgpack.load(f, encoding='utf8')
train = data['train']
data['dev'].sort(key=lambda x: len(x[1]))
dev = [x[:-1] for x in data['dev']]
dev_y = [x[-1] for x in data['dev']]
return train, dev, dev_y, embedding, opt
评论列表
文章目录