def getCandidates(self, mention, threshold=0.7):
res = []
# ??title?
for id, title, link_count in self.db_titles:
m_score = Levenshtein.ratio(title, mention)
if m_score > threshold:
self.cur.execute("select abstract from abstract where id = %s"%id)
context = self.cur.fetchall()
if context != ():
context = json.loads(context[0][0])
RE = []
self.cur.execute("select to_id from link where from_id = %s"%id)
linkto_ids = self.cur.fetchall()
if linkto_ids != ():
for to_id in linkto_ids:
RE.append(to_id[0])
res.append(Entity(title, id, m_score, context, link_count, RE))
# ??disambiguation?
for id, title, dis_context, link_count in self.db_disambiguations:
m_score = Levenshtein.ratio(title, mention)
if m_score > threshold:
title += '[%s]'%dis_context
self.cur.execute("select abstract from abstract where id = %s"%id)
context = self.cur.fetchall()
if context != ():
context = json.loads(context[0][0])
context.append(dis_context)
RE = []
self.cur.execute("select to_id from link where from_id = %s"%id)
linkto_ids = self.cur.fetchall()
if linkto_ids != ():
for to_id in linkto_ids:
RE.append(to_id[0])
res.append(Entity(title, id, m_score, context, link_count, RE))
return res
评论列表
文章目录