def load_xml(self, xmldir):
'''
for KDD/WWW/UMD only
:return: doclist
'''
for filename in os.listdir(xmldir):
with open(xmldir+filename) as textfile:
doc = Document()
doc.name = filename[:filename.find('.xml')]
import string
printable = set(string.printable)
# print((filename))
try:
lines = textfile.readlines()
xml = ''.join([filter(lambda x: x in printable, l) for l in lines])
root = ET.fromstring(xml)
doc.title = root.findall("title")[0].text
doc.abstract = root.findall("abstract")[0].text
doc.phrases = [n.text for n in root.findall("*/tag")]
self.doclist.append(doc)
except UnicodeDecodeError:
print('UnicodeDecodeError detected! %s' % filename )
keyphrase_test_dataset.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录