def get_tree(page):
page = page.replace(" ", " ") # otherwise starts-with for lxml doesn't work
try:
tree = html.fromstring(page)
except (etree.XMLSyntaxError, etree.ParserError) as e:
print u"not parsing, beause etree error in get_tree: {}".format(e)
tree = None
return tree
评论列表
文章目录