SolutionSpider.py 文件源码-python代码片段

SolutionSpider.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

项目：codeforces-crawler 作者: Nymphet 项目源码文件源码

def parse(self, response):
        for solution_href in response.selector.xpath('//a[@title="Participants solved the problem"]/@href'):
            solution_url = response.urljoin(
                solution_href.extract() + '?order=BY_CONSUMED_TIME_ASC')
            yield scrapy.Request(solution_url, callback=self.parse_problem_solution_list_page)

        if response.selector.xpath('//span[@class="inactive"]/text()').extract():
            if response.selector.xpath('//span[@class="inactive"]/text()')[0].extract() != u'\u2192':
                next_page_href = response.selector.xpath(
                    '//div[@class="pagination"]/ul/li/a[@class="arrow"]/@href')[0]
                next_page_url = response.urljoin(next_page_href.extract())
                yield scrapy.Request(next_page_url, callback=self.parse)
        else:
            next_page_href = response.selector.xpath(
                '//div[@class="pagination"]/ul/li/a[@class="arrow"]/@href')[1]
            next_page_url = response.urljoin(next_page_href.extract())
            yield scrapy.Request(next_page_url, callback=self.parse)