def relations(self,response):
self.obj.get(response.url)
followees_a = self.obj.find_elements_by_xpath('//a[@class="UserLink-link"]')
#pdb.set_trace()
#followees_a = response.xpath('//a[@class="UserLink-link"]/@href').extract()
followees = []
for one in followees_a:
try:
one = one.get_attribute('href')
followees.append(one.replace('https://www.zhihu.com/people/',''))
except:
pass
followees = list(set(followees))
#pdb.set_trace()
response.meta['item']['relations_id']+=followees
nextpage_button = response.xpath('//button[@class="Button PaginationButton PaginationButton-next Button--plain"]').extract()
if nextpage_button:
#pdb.set_trace()
nextpage_url = response.url.replace('?page='+str(response.meta['page']),'') + "?page=" + str(response.meta['page']+1)
yield Request(nextpage_url,callback=self.relations,meta={'page':response.meta['page']+1,'item':response.meta['item']})
else:
yield response.meta['item']
for user in followees:
yield Request('https://www.zhihu.com/people/'+user+'/answers',callback=self.parse)
评论列表
文章目录