def getpage(self, url_pair):
# Incoming argument name is a (URL, fragment) pair.
# The page may have been cached in the name_table variable.
url, fragment = url_pair
if self.name_table.has_key(url):
return self.name_table[url]
scheme, path = urllib.splittype(url)
if scheme in ('mailto', 'news', 'javascript', 'telnet'):
self.note(1, " Not checking %s URL" % scheme)
return None
isint = self.inroots(url)
# Ensure that openpage gets the URL pair to
# print out its error message and record the error pair
# correctly.
if not isint:
if not self.checkext:
self.note(1, " Not checking ext link")
return None
f = self.openpage(url_pair)
if f:
self.safeclose(f)
return None
text, nurl = self.readhtml(url_pair)
if nurl != url:
self.note(1, " Redirected to %s", nurl)
url = nurl
if text:
return Page(text, url, maxpage=self.maxpage, checker=self)
# These next three functions take (URL, fragment) pairs as
# arguments, so that openpage() receives the appropriate tuple to
# record error messages.
评论列表
文章目录