def parse_tag(self, response):
res = LinkExtractor(allow=('.*/user/.*'), allow_domains='www.reddit.com').extract_links(response)
for one in res:
if one.text != 'Click here!':
path = one.url.replace('https://www.reddit.com', '')
yield Request(url=one.url, callback=self.parse_user, meta={'cookies': True, 'path': path})
res = LinkExtractor(allow=('.*/comments/.*'), allow_domains='www.reddit.com').extract_links(response)
for one in res:
path = one.url.replace('https://www.reddit.com', '')
yield Request(url=one.url, callback=self.parse_comment, meta={'cookies': True, 'path': path})
next_page = response.css(
'#siteTable > div.nav-buttons > span > span.next-button > a::attr(href)').extract_first()
if next_page:
path = next_page.replace('https://www.reddit.com', '')
yield Request(url=next_page, callback=self.parse_tag, meta={'cookies': True, 'path': path})
else:
self.logger.info('No next page in parse_tag')
评论列表
文章目录