def get_relevant_entities(self, google_cloud_entities, target_entities, target_wikipedia_urls):
entities_to_return = []
target_wikipedia_urls_lower = [target_wikipedia_url.lower() for target_wikipedia_url in target_wikipedia_urls]
for google_cloud_entity in google_cloud_entities:
# Look at Wikipedia URLs
if google_cloud_entity.wikipedia_url and google_cloud_entity.wikipedia_url.lower() in target_wikipedia_urls_lower:
entities_to_return.append(google_cloud_entity.name)
continue
# Look at names
a = google_cloud_entity.name.lower().split(" ")
for target_entity in target_entities:
b = target_entity.lower().split(" ")
if google_cloud_entity in entities_to_return:
break
for google_cloud_entity_part in a:
for target_entity_part in b:
ratio = SequenceMatcher(None, google_cloud_entity_part, target_entity_part).ratio()
if ratio > 0.7:
entities_to_return.append(google_cloud_entity.name)
break
if google_cloud_entity in entities_to_return:
break
return entities_to_return
评论列表
文章目录