def depaginate(self, response, data_key=None):
logging.debug('Attempting to depaginate response from {}'.format(response.url))
all_data = []
this_data = self.extract_data_from_response(response, data_key=data_key)
if this_data is not None:
if type(this_data) == list:
all_data += this_data
else:
all_data.append(this_data)
if self.has_pagination_links(response):
pagination_links = self.extract_pagination_links(response)
while 'next' in pagination_links:
response = self.session.get(pagination_links['next'])
pagination_links = self.extract_pagination_links(response)
this_data = self.extract_data_from_response(response, data_key=data_key)
if this_data is not None:
if type(this_data) == list:
all_data += this_data
else:
all_data.append(this_data)
else:
logging.warn('Response from {} has no pagination links.'.format(response.url))
return all_data
评论列表
文章目录