def parse(self, response):
article_nodes = response.css('#block-content-article .mainer .item a.title')
for article_node in article_nodes:
article_url = urlparse.urljoin(response.url, str(article_node.css("::attr(href)").extract_first(
""))) # "http://www.acfun.cn" + str(article_node.css("::attr(href)").extract_first(""))
yield Request(url=article_url, callback=self.parse_detail, dont_filter=True)
next_nodes = response.css(".pager")
next_node = next_nodes[len(next_nodes) - 1]
next_url = str(next_node.css("::attr(href)").extract_first(""))
if next_url:
next_url = urlparse.urljoin(response.url, next_url)
yield Request(url=next_url, callback=self.parse, dont_filter=True)
评论列表
文章目录