def get_description_urls(html):
from htmllib import HTMLParser
from formatter import NullFormatter
import urlparse, sgmllib
try:
parser = HTMLParser(NullFormatter())
parser.feed(html)
parser.close()
except sgmllib.SGMLParseError:
return []
result = []
for url in parser.anchorlist:
if urlparse.urlparse(url)[0]:
result.append(xmlescape(url))
return result
评论列表
文章目录