def parse_question(self,response):
list_item = response.xpath('//div[@class="List-item"]')
for one in list_item:
item = QuestionItem()
item['ask_user_id'] = response.meta['ask_user_id']
title = one.xpath('.//div[@class="QuestionItem-title"]')
item['title'] = title.xpath('./a/text()').extract()[0]
item['question_id'] = title.xpath('./a/@href').extract()[0].replace('/question/','')
content_item = one.xpath('.//div[@class="ContentItem-status"]//span/text()').extract()
item['ask_time'] = content_item[0]
item['answer_count'] = content_item[1]
item['followees_count'] = content_item[2]
yield item
next_page = response.xpath('//button[@class="Button PaginationButton PaginationButton-next Button--plain"]/text()').extract()
if next_page:
response.meta['page'] += 1
next_url = re.findall('(.*page=)\d+',response.url)[0] + str(response.meta['page'])
yield Request(next_url,callback=self.parse_question,meta={'ask_user_id':response.meta['ask_user_id'],'page':response.meta['page']})
评论列表
文章目录