def search_docs(inputs, max_ex=5, opts=None):
"""Given a set of document ids (returned by ranking for a question), search
for top N best matching (by heuristic) paragraphs that contain the answer.
"""
if not opts:
raise RuntimeError('Options dict must be supplied.')
doc_ids, q_tokens, answer = inputs
examples = []
for i, doc_id in enumerate(doc_ids):
for j, paragraph in enumerate(re.split(r'\n+', fetch_text(doc_id))):
found = find_answer(paragraph, q_tokens, answer, opts)
if found:
# Reverse ranking, giving priority to early docs + paragraphs
score = (found[0], -i, -j, random.random())
if len(examples) < max_ex:
heapq.heappush(examples, (score, found[1]))
else:
heapq.heappushpop(examples, (score, found[1]))
return [e[1] for e in examples]
评论列表
文章目录