def dmoz_reader(filename):
doc = pulldom.parse(filename)
for event, node in doc:
if event == pulldom.START_ELEMENT and node.tagName == 'ExternalPage':
doc.expandNode(node)
url = node.attributes['about'].value
topic_node = node.getElementsByTagName('topic')[0]
topics = topic_node.childNodes[0].data
yield url, topics
评论列表
文章目录