dac.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:dac 作者: jlonij 项目源码 文件源码
def set_levenshtein(self):
        '''
        Mean and max Levenshtein ratio for all labels.
        '''
        if not [f for f in self.features if f.startswith('match_str_lsr')]:
            return

        ne = self.cluster.entities[0].norm

        # Pref label
        l = self.document.get('pref_label')
        self.match_str_lsr_pref = Levenshtein.ratio(ne, l)

        # Wikidata alt labels
        if self.document.get('wd_alt_label'):
            wd_labels = self.document.get('wd_alt_label')
            ratios = [Levenshtein.ratio(ne, l) for l in wd_labels]
            self.match_str_lsr_wd_max = max(ratios) - 0.5
            self.match_str_lsr_wd_mean = (sum(ratios) /
                float(len(wd_labels))) - 0.375
        else:
            wd_labels = []

        # Any other alt labels
        if self.document.get('alt_label'):
            labels = self.document.get('alt_label')
            labels = [l for l in labels if l not in wd_labels]
            if labels:
                ratios = [Levenshtein.ratio(ne, l) for l in labels]
                self.match_str_lsr_alt_max = max(ratios) - 0.5
                self.match_str_lsr_alt_mean = (sum(ratios) /
                        float(len(labels))) - 0.375
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号