loadFunctions.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:kindred 作者: jakelever 项目源码 文件源码
def convertBiocDocToKindredDocs(document):
    assert isinstance(document,bioc.BioCDocument)
    kindredDocs = []
    for passage in document.passages:
        assert isinstance(passage,bioc.BioCPassage)

        text = passage.text
        offset = int(native(passage.offset))
        entities = []
        relations = []

        for a in passage.annotations:
            assert isinstance(a,bioc.BioCAnnotation)

            entityType = a.infons['type']
            sourceEntityID = a.id

            position = []
            segments = []

            for l in a.locations:
                assert isinstance(l,bioc.BioCLocation)
                startPos = int(native(l.offset)) - offset
                endPos = startPos + int(native(l.length))
                position.append((startPos,endPos))
                segments.append(text[startPos:endPos])

            entityText = " ".join(segments)
            e = kindred.Entity(entityType,entityText,position,sourceEntityID)
            entities.append(e)

        for r in passage.relations:
            assert isinstance(r,bioc.BioCRelation)
            relationType = r.infons['type']

            arguments = []
            for n in r.nodes:
                assert isinstance(n,bioc.BioCNode)
                arguments.append((n.role,n.refid))
            arguments = sorted(arguments)

            entityIDs = [ entityID for argName,entityID in arguments]
            argNames = [ argName for argName,entityID in arguments]

            r = kindred.Relation(relationType=relationType,entityIDs=entityIDs,argNames=argNames)
            relations.append(r)

        metadata = dict(document.infons)
        metadata.update(passage.infons)
        metadata['id'] = document.id
        relData = kindred.Document(text,entities=entities,relations=relations,metadata=metadata)
        kindredDocs.append(relData)

    return kindredDocs
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号