def xml_to_data(xmlNode):
global punctuation
data = ''
i = 0
for node in xmlNode.childNodes:
if node.nodeType == Node.ELEMENT_NODE:
tobeAdded = xml_to_data(node)
elif node.nodeType == Node.TEXT_NODE:
tobeAdded = sanitize_data(node.data)
else:
assert 0
if i == 0:
data = tobeAdded
else:
if tobeAdded in punctuation:
data = data + tobeAdded
else:
data = data + ' ' + tobeAdded
i += 1
return data
评论列表
文章目录