qichacha.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:lichking 作者: melonrun 项目源码 文件源码
def generate_firm_susong(self, response):
        if len(response.body) < 10:
            return
        qitem = response.meta["item"]
        page_n = response.meta["page_n"]

        self.append_susong_detail({"????": self.clean_content(response.body)}, qitem._id)

        anjian_list = response.xpath("//table[@class='m_changeList']//a[@class='c_a']/@onclick").extract()
        anjian_name = response.xpath("//table[@class='m_changeList']//tr//td[2]//a[@class='c_a']/text()").extract()
        for i in range(0, len(anjian_list)):
            yield scrapy.FormRequest(
                "http://www.qichacha.com/company_wenshuView",
                callback=self.generate_firm_anjian,
                cookies=self.qicha_cookie,
                method='POST',
                dont_filter="true",
                formdata={"id": self.generate_anjian_id(anjian_list[i])},
                meta={"item_id": qitem._id, "anjian_name": anjian_name[i]}
            )
        # ?????
        yield scrapy.Request(
            response.meta["chacha_url_pre"] + '&tab=susong&box=wenshu&p=' + str(page_n),
            encoding='utf-8',
            callback=self.generate_firm_susong,
            cookies=self.qicha_cookie,
            meta={"item": qitem, "chacha_url_pre": response.meta["chacha_url_pre"], "page_n": int(page_n)+1}
        )
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号