def parseSimpleTag(text,ignoreEntities=[]):
docText = u"<doc>%s</doc>" % text
xmldoc = minidom.parseString(docText.encode('utf8'))
docNode = xmldoc.childNodes[0]
text,unmergedEntities,relations = parseSimpleTag_helper(docNode,ignoreEntities=ignoreEntities)
missingSourceEntityID = [ e.sourceEntityID == '' for e in unmergedEntities ]
assert all(missingSourceEntityID) or (not any(missingSourceEntityID)), 'All entities or none (not some) should be given IDs'
assert (not any(missingSourceEntityID)) or len(relations) == 0, "Cannot include relations with no-ID entities"
if all(missingSourceEntityID):
for i,e in enumerate(unmergedEntities):
e.sourceEntityID = i+1
entities = mergeEntitiesWithMatchingIDs(unmergedEntities)
combinedData = kindred.Document(text,entities=entities,relations=relations)
return combinedData
评论列表
文章目录