spider.py 文件源码-python代码片段

def parse(self, response):
        topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/a/text()'
        topic_names = response.selector.xpath(topic_xpath_rule).extract()

        topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/@data-id'
        topic_ids = response.selector.xpath(topic_xpath_rule).extract()

        # for i in range(len(topic_ids)):
        print("?30???")
        # for i in range(10):
        for i in range(len(topic_ids)):
            params = {"topic_id": int(topic_ids[i]), "offset": 0, "hash_id": "d17ff3d503b2ebce086d2f3e98944d54"}
            yield FormRequest(
                url='https://www.zhihu.com/node/TopicsPlazzaListV2',
                method='POST',
                # headers=self.set_headers2('https://www.zhihu.com/topics'),
                headers=self.set_headers('https://www.zhihu.com/topics'),
                cookies=cookielib.LWPCookieJar(filename='cookies'),
                # formdata={'method': 'next', 'params': '{"topic_id":988,"offset":0,"hash_id":"d17ff3d503b2ebce086d2f3e98944d54"}'},
                formdata={'method': 'next', 'params': str(params).replace("\'", "\"").replace(" ", "")},
                callback=self.topic_parse,
                meta={'topic_name': topic_names[i]}
            )