def link_parse(self, response):
deeps = get_project_settings()['SPIDER_DEEP']
# ???????????
links = response.xpath("//li[@class='pbox clr']/div[@class='word']/a/@href").extract()
if len(links) > 0:
for link in links:
yield Request(DOMAIN + link, callback=self.parse_content)
page_url = response.url
page_size = page_url.split("page_")
# ???size=2???????
if len(page_size) == 2:
page_index = page_url.split("page_")[1].replace('.html', '')
if 1 < int(page_index) < deeps:
yield Request(page_url, callback=self.link_parse)
# ?????????
评论列表
文章目录