def parse_relation(self,response):
json_result = str(response.body,encoding="utf8").replace('false','0').replace('true','1')
dict_result = eval(json_result)
relations_id = []
for one in dict_result['data']:
relations_id.append(one['url_token'])
response.meta['item']['relations_id'] = relations_id
if response.meta['offset'] == 0:
response.meta['item']['relation_type'] = response.meta['relation_type']
else:
response.meta['item']['relation_type'] = 'next:' + response.meta['relation_type']
#pdb.set_trace()
yield response.meta['item']
for one in response.meta['item']['relations_id']:
yield Request('https://www.zhihu.com/api/v4/members/'+one+'?include=locations,employments,industry_category,gender,educations,business,follower_count,following_count,description,badge[?(type=best_answerer)].topics',meta={'user_id':one},callback=self.parse)
#pdb.set_trace()
if dict_result['paging']['is_end'] == 0:
#pdb.set_trace()
offset = response.meta['offset'] + 20
next_page = re.findall('(.*offset=)\d+',response.url)[0]
#pdb.set_trace()
yield Request(next_page + str(offset),callback=self.parse_relation,meta={'item':response.meta['item'],'offset':offset,'relation_type':response.meta['relation_type']})
评论列表
文章目录