def __process_loc_results__(self, results, label):
"""Method takes the json results from running the
Args:
results(list): List of JSON rows from LOC ID call
label(str): Original Label
"""
title, loc_uri, term_weights = None, None, dict()
for row in results:
if isinstance(row, dict) or not row[0].startswith('atom:entry'):
continue
if row[2][0].startswith("atom:title"):
title = row[2][-1]
if row[3][0].startswith("atom:link"):
loc_url = row[3][-1].get('href')
if "subjects/" in loc_url:
bf_class = BF.Topic
elif "organizations/" in loc_url:
bf_class = BF.Organization
else:
bf_class = BF.Agent
loc_uri = rdflib.URIRef(loc_url)
term_weights[str(loc_uri)] = {
"weight": fuzz.ratio(label, title),
"class": bf_class,
"title": title}
results = sorted(term_weights.items(), key=lambda x: x[1]['weight'])
results.reverse()
for row in results:
loc_url = row[0]
weight = row[1].get('weight')
title = row[1].get('title')
if weight >= self.cutoff:
return rdflib.URIRef(loc_url), rdflib.Literal(title)
return None, None
评论列表
文章目录