user.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:Spider 作者: poluo 项目源码 文件源码
def parse_tag(self, response):
        res = LinkExtractor(allow=('.*/user/.*'), allow_domains='www.reddit.com').extract_links(response)
        for one in res:
            if one.text != 'Click here!':
                path = one.url.replace('https://www.reddit.com', '')
                yield Request(url=one.url, callback=self.parse_user, meta={'cookies': True, 'path': path})

        res = LinkExtractor(allow=('.*/comments/.*'), allow_domains='www.reddit.com').extract_links(response)
        for one in res:
            path = one.url.replace('https://www.reddit.com', '')
            yield Request(url=one.url, callback=self.parse_comment, meta={'cookies': True, 'path': path})

        next_page = response.css(
            '#siteTable > div.nav-buttons > span > span.next-button > a::attr(href)').extract_first()
        if next_page:
            path = next_page.replace('https://www.reddit.com', '')
            yield Request(url=next_page, callback=self.parse_tag, meta={'cookies': True, 'path': path})
        else:
            self.logger.info('No next page in parse_tag')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号