def get_story_question_answer_triples(sqa_file):
sqatriples = []
fsqa = open(sqa_file, "rb")
for line in fsqa:
line = line.strip().decode("utf8").encode("ascii", "ignore")
if line.startswith("#"):
continue
story, question, answer, correct = line.split("\t")
swords = []
story_sents = nltk.sent_tokenize(story)
for story_sent in story_sents:
swords.extend(nltk.word_tokenize(story_sent))
qwords = nltk.word_tokenize(question)
awords = nltk.word_tokenize(answer)
is_correct = int(correct) == 1
sqatriples.append((swords, qwords, awords, is_correct))
fsqa.close()
return sqatriples
评论列表
文章目录