def getlinkinfos(self):
# File reading is done in __init__() routine. Store parser in
# local variable to indicate success of parsing.
# If no parser was stored, fail.
if not self.parser: return []
rawlinks = self.parser.getlinks()
base = urlparse.urljoin(self.url, self.parser.getbase() or "")
infos = []
for rawlink in rawlinks:
t = urlparse.urlparse(rawlink)
# DON'T DISCARD THE FRAGMENT! Instead, include
# it in the tuples which are returned. See Checker.dopage().
fragment = t[-1]
t = t[:-1] + ('',)
rawlink = urlparse.urlunparse(t)
link = urlparse.urljoin(base, rawlink)
infos.append((link, rawlink, fragment))
return infos
评论列表
文章目录