def parse(self, response):
for solution_href in response.selector.xpath('//a[@title="Participants solved the problem"]/@href'):
solution_url = response.urljoin(
solution_href.extract() + '?order=BY_CONSUMED_TIME_ASC')
yield scrapy.Request(solution_url, callback=self.parse_problem_solution_list_page)
if response.selector.xpath('//span[@class="inactive"]/text()').extract():
if response.selector.xpath('//span[@class="inactive"]/text()')[0].extract() != u'\u2192':
next_page_href = response.selector.xpath(
'//div[@class="pagination"]/ul/li/a[@class="arrow"]/@href')[0]
next_page_url = response.urljoin(next_page_href.extract())
yield scrapy.Request(next_page_url, callback=self.parse)
else:
next_page_href = response.selector.xpath(
'//div[@class="pagination"]/ul/li/a[@class="arrow"]/@href')[1]
next_page_url = response.urljoin(next_page_href.extract())
yield scrapy.Request(next_page_url, callback=self.parse)
评论列表
文章目录