def parse(self, response):
'''
1.????????????url,???scrapy??????????
2.??????url???scrapy?????????????parse
:param response:
:return:
'''
#???????????url????scrapy???????
post_nodes = response.css("#archive .floated-thumb .post-thumb a")
for post_node in post_nodes:
#image_url??????
image_url = post_node.css("img::attr(src)").extract_first("")
post_url = post_node.css("::attr(href)").extract_first("")
#????meta??????url????????parse.urljoin?????????????response.url???
# ???????response.url?post_url???
yield Request(url=parse.urljoin(response.url,post_url),meta={"front_image_url":parse.urljoin(response.url,image_url)},callback=self.parse_detail)
#????????scrapy??
next_url = response.css(".next.page-numbers::attr(href)").extract_first("")
if next_url:
yield Request(url=next_url,callback=self.parse)
评论列表
文章目录