def set_entity_match(self):
'''
Match other entities appearing in the article with DBpedia abstract.
'''
if not 'match_txt_entities' in self.features:
return
if not hasattr(self.cluster, 'entity_parts'):
self.cluster.get_entity_parts()
if not hasattr(self.cluster, 'context_entity_parts'):
self.cluster.get_context_entity_parts()
if not self.cluster.context_entity_parts:
return
if not hasattr(self, 'abstract_bow'):
self.tokenize_abstract()
bow = [t for t in self.abstract_bow if len(t) > 4]
entity_match = len(set(self.cluster.context_entity_parts) & set(bow))
self.match_txt_entities = math.tanh(entity_match * 0.25)
评论列表
文章目录