def parse_sitemap(content):
if not isinstance(content, six.text_type):
content = content.decode('utf-8')
urlset_match = re.search(
r'<urlset[^>]*>(?P<urls>[\s\S]*)</urlset>', content
)
if urlset_match:
results = []
urlset_content = urlset_match.groupdict()['urls']
for url_content in re.findall(r'<url>([\s\S]+)</url>', urlset_content):
results.append(
dict(
re.findall(r'<([^>]+)>([^<]*)</[^>]+>', url_content)
)
)
else:
results = None
return results
评论列表
文章目录