def get_pub_dic_xml(file_name = 'data/proton-beam-all.xml'):
tree = ET.parse(file_name)
root = tree.getroot()[0]
# Create dic of : id -> text features
pub_dic = {}
for pub in root:
rec_number = int (get_text (pub.find('rec-number')))
abstract = get_text (pub.find('abstract'))
title = get_text (pub.find('titles')[0])
text = title + abstract
for kw in pub.find('keywords'):
text = text + kw.text + ' '
pub_dic[rec_number] = text
return pub_dic
评论列表
文章目录