def link_parse(self, response):
deeps = get_project_settings()['SPIDER_DEEP']
# ??????????????
links = response.xpath("//li[@class='itm itm_new']/a/@href").extract()
if len(links) == 0:
yield self.parse_content(response)
else:
for link_item in links:
yield Request(DOMAIN + link_item, callback=self.parse_content)
# ??????
link_page = response.xpath("//li[@class='itm itm_new']/span/a/@href").extract()
print "link_page:", link_page
for page_item in link_page:
page_id_list = page_item.split("pg=")
this_page_list = response.url.split("pg=")
this_index = 1
if len(this_page_list) == 2:
this_index = this_page_list[-1]
if len(page_id_list) == 2 and int(this_index) < int(page_id_list[-1]) < deeps:
print page_item
yield Request(page_item, callback=self.link_parse)
# ?????????
评论列表
文章目录