def __init__(self, file_name):
with open(file_name, encoding='utf-8') as file:
content = file.read()
if not any(u in content for u in ('utf-8', 'utf8', 'UTF8', 'UTF-8')):
raise ValueError("XML file is not encoded in UTF-8. Please recode "
"the file or extend this parser and XML writer.")
tei_start = content.find('<TEI')
if tei_start < 0:
raise ValueError("Couldn't find string `<TEI` in the XML file. Please extend this parser.")
self.before_root = content[:tei_start]
content = content[tei_start:]
tei_end = content.find('</TEI>')
if tei_end < 0:
raise ValueError("Couldn't find `</TEI>` in the input file, please extend the parser.")
tei_end += len('</TEI>')
self.after_root = content[tei_end:]
content = content[:tei_end]
parser = ET.XMLParser(target = CommentedTreeBuilder())
try:
parser.feed(content)
except ET.ParseError as e:
sys.stderr.write("Error while parsing input file\n")
sys.stderr.write(str(e).encode(sys.getdefaultencoding()) + '\n')
sys.exit(15)
self.root = parser.close()
评论列表
文章目录