def parse(self, response, crawler):
document = lxml.etree.HTML(response.text)
for title in document.cssselect('tr.athing a.storylink'):
yield title.text
urlinfo = urllib.parse.urlparse(response.url)
base_url = urlinfo.scheme + '://' + urlinfo.netloc
try:
href = document.cssselect('a.morelink')[0].get('href')
except:
return
next_url = urllib.parse.urljoin(base_url, href)
crawler.schedule_request(next_url)
评论列表
文章目录