def read(self, file_path: str):
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
instances = []
with open(file_path, 'r') as snli_file:
logger.info("Reading SNLI instances from jsonl dataset at: %s", file_path)
for line in tqdm.tqdm(snli_file):
example = json.loads(line)
label = example["gold_label"]
if label == '-':
# These were cases where the annotators disagreed; we'll just skip them. It's
# like 800 out of 500k examples in the training data.
continue
premise = example["sentence1"]
hypothesis = example["sentence2"]
instances.append(self.text_to_instance(premise, hypothesis, label))
if not instances:
raise ConfigurationError("No instances were read from the given filepath {}. "
"Is the path correct?".format(file_path))
return Dataset(instances)
评论列表
文章目录