def dopage(self, url_pair):
# All printing of URLs uses format_url(); argument changed to
# url_pair for clarity.
if self.verbose > 1:
if self.verbose > 2:
self.show("Check ", self.format_url(url_pair),
" from", self.todo[url_pair])
else:
self.message("Check %s", self.format_url(url_pair))
url, local_fragment = url_pair
if local_fragment and self.nonames:
self.markdone(url_pair)
return
try:
page = self.getpage(url_pair)
except sgmllib.SGMLParseError, msg:
msg = self.sanitize(msg)
self.note(0, "Error parsing %s: %s",
self.format_url(url_pair), msg)
# Dont actually mark the URL as bad - it exists, just
# we can't parse it!
page = None
if page:
# Store the page which corresponds to this URL.
self.name_table[url] = page
# If there is a fragment in this url_pair, and it's not
# in the list of names for the page, call setbad(), since
# it's a missing anchor.
if local_fragment and local_fragment not in page.getnames():
self.setbad(url_pair, ("Missing name anchor `%s'" % local_fragment))
for info in page.getlinkinfos():
# getlinkinfos() now returns the fragment as well,
# and we store that fragment here in the "todo" dictionary.
link, rawlink, fragment = info
# However, we don't want the fragment as the origin, since
# the origin is logically a page.
origin = url, rawlink
self.newlink((link, fragment), origin)
else:
# If no page has been created yet, we want to
# record that fact.
self.name_table[url_pair[0]] = None
self.markdone(url_pair)
评论列表
文章目录