def analy_following_profile(self,html_text):
tree = html.fromstring(html_text)
url_list = tree.xpath("//h2[@class='ContentItem-title']//span[@class='UserLink UserItem-name']//a[@class='UserLink-link']/@href")
for target_url in url_list:
target_url = "https://www.zhihu.com" + target_url
target_url = target_url.replace("https", "http")
if red.sadd('red_had_spider', target_url):
red.lpush('red_to_spider', target_url)
评论列表
文章目录