def get_all_pic_url(question_num, answer_offset,answer_limit):
url = 'https://www.zhihu.com/api/v4/questions/{qnum}/answers?include=data%5B*%5D.is_normal%2Cis_collapsed%2Cannotation_action%' \
'2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%' \
'2Cmark_infos%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cupvoted_followees%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&' \
'offset={offset}&limit={limit}&sort_by=default'
response = session.get(url.format(qnum=question_num,offset=answer_offset,limit=answer_limit), headers=headers, allow_redirects=False)
print('json_response', response)
json_response = response.json()
answer = json_response['data']
pattern = re.compile(r'data-original=\"https\:(.*?)\.(jpg|png)"')
urls = [];
for i in range(0, len(answer)):
per_answer_dict = answer[i] # dict
per_answer_content_str = per_answer_dict['content']
match = pattern.findall(per_answer_content_str)
urls.extend(["https:" + i[0] + ".jpg" for i in match[1::2]])
return urls
评论列表
文章目录