db.py 文件源码-python代码片段

def getCandidates(self, mention, threshold=0.7):
        res = []

        # ??title?
        for id, title, link_count in self.db_titles:
            m_score = Levenshtein.ratio(title, mention)
            if m_score > threshold:
                self.cur.execute("select abstract from abstract where id = %s"%id)
                context = self.cur.fetchall()
                if context != ():
                    context = json.loads(context[0][0])

                RE = []
                self.cur.execute("select to_id from link where from_id = %s"%id)
                linkto_ids = self.cur.fetchall()
                if linkto_ids != ():
                    for to_id in linkto_ids:
                        RE.append(to_id[0])

                res.append(Entity(title, id, m_score, context, link_count, RE))

        # ??disambiguation?
        for id, title, dis_context, link_count in self.db_disambiguations:
            m_score = Levenshtein.ratio(title, mention)
            if m_score > threshold:
                title += '[%s]'%dis_context
                self.cur.execute("select abstract from abstract where id = %s"%id)
                context = self.cur.fetchall()
                if context != ():
                    context = json.loads(context[0][0])
                    context.append(dis_context)

                RE = []
                self.cur.execute("select to_id from link where from_id = %s"%id)
                linkto_ids = self.cur.fetchall()
                if linkto_ids != ():
                    for to_id in linkto_ids:
                        RE.append(to_id[0])

                res.append(Entity(title, id, m_score, context, link_count, RE))
        return res