def get_page_count(self, response):
pages = response.xpath('//div[@class="paginator"]/a/text()').extract()
page_count = int(pages[len(pages) - 1])
tag = response.meta.get('tag')
for i in range(page_count):
url = 'https://movie.douban.com/tag/%s?start=%s&type=T' % (tag, i * 20)
yield Request(
url = url,
headers = self.headers,
dont_filter = True,
meta = {
'tag': tag,
'page': i + 1,
'download_timeout': 20,
},
callback = self.get_page,
errback = self.error_parse
)
评论列表
文章目录