entity.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:cdata 作者: cnschema 项目源码 文件源码
def __init__(self, entity_list):
        """
            [{"@id":"1","name":"??"},{"@id":"2","name":"??"}]
            all input text are assumed (or will be converted into) unicode
        """
        # init entity index
        self.entities = collections.defaultdict(list)
        entity_list_unicode = []
        for entity in entity_list:
            entity_list_unicode.append(any2unicode(entity))

        for entity in entity_list_unicode:
            name = entity["name"]
            self.entities[name].append(entity)

        for entity in entity_list_unicode:
            for name in entity.get("alternateName", []):
                self.entities[name].append(entity)

        stat(entity_list_unicode, ["name"])

        # init jieba
        self.tokenizer = jieba.Tokenizer()
        for name in self.entities:
            self.tokenizer.add_word(name)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号