def parse(self, response):
topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/a/text()'
topic_names = response.selector.xpath(topic_xpath_rule).extract()
topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/@data-id'
topic_ids = response.selector.xpath(topic_xpath_rule).extract()
# for i in range(len(topic_ids)):
print("?30???")
# for i in range(10):
for i in range(len(topic_ids)):
params = {"topic_id": int(topic_ids[i]), "offset": 0, "hash_id": "d17ff3d503b2ebce086d2f3e98944d54"}
yield FormRequest(
url='https://www.zhihu.com/node/TopicsPlazzaListV2',
method='POST',
# headers=self.set_headers2('https://www.zhihu.com/topics'),
headers=self.set_headers('https://www.zhihu.com/topics'),
cookies=cookielib.LWPCookieJar(filename='cookies'),
# formdata={'method': 'next', 'params': '{"topic_id":988,"offset":0,"hash_id":"d17ff3d503b2ebce086d2f3e98944d54"}'},
formdata={'method': 'next', 'params': str(params).replace("\'", "\"").replace(" ", "")},
callback=self.topic_parse,
meta={'topic_name': topic_names[i]}
)
评论列表
文章目录