def convertBiocDocToKindredDocs(document):
assert isinstance(document,bioc.BioCDocument)
kindredDocs = []
for passage in document.passages:
assert isinstance(passage,bioc.BioCPassage)
text = passage.text
offset = int(native(passage.offset))
entities = []
relations = []
for a in passage.annotations:
assert isinstance(a,bioc.BioCAnnotation)
entityType = a.infons['type']
sourceEntityID = a.id
position = []
segments = []
for l in a.locations:
assert isinstance(l,bioc.BioCLocation)
startPos = int(native(l.offset)) - offset
endPos = startPos + int(native(l.length))
position.append((startPos,endPos))
segments.append(text[startPos:endPos])
entityText = " ".join(segments)
e = kindred.Entity(entityType,entityText,position,sourceEntityID)
entities.append(e)
for r in passage.relations:
assert isinstance(r,bioc.BioCRelation)
relationType = r.infons['type']
arguments = []
for n in r.nodes:
assert isinstance(n,bioc.BioCNode)
arguments.append((n.role,n.refid))
arguments = sorted(arguments)
entityIDs = [ entityID for argName,entityID in arguments]
argNames = [ argName for argName,entityID in arguments]
r = kindred.Relation(relationType=relationType,entityIDs=entityIDs,argNames=argNames)
relations.append(r)
metadata = dict(document.infons)
metadata.update(passage.infons)
metadata['id'] = document.id
relData = kindred.Document(text,entities=entities,relations=relations,metadata=metadata)
kindredDocs.append(relData)
return kindredDocs
评论列表
文章目录