def generate_firm_susong(self, response):
if len(response.body) < 10:
return
qitem = response.meta["item"]
page_n = response.meta["page_n"]
self.append_susong_detail({"????": self.clean_content(response.body)}, qitem._id)
anjian_list = response.xpath("//table[@class='m_changeList']//a[@class='c_a']/@onclick").extract()
anjian_name = response.xpath("//table[@class='m_changeList']//tr//td[2]//a[@class='c_a']/text()").extract()
for i in range(0, len(anjian_list)):
yield scrapy.FormRequest(
"http://www.qichacha.com/company_wenshuView",
callback=self.generate_firm_anjian,
cookies=self.qicha_cookie,
method='POST',
dont_filter="true",
formdata={"id": self.generate_anjian_id(anjian_list[i])},
meta={"item_id": qitem._id, "anjian_name": anjian_name[i]}
)
# ?????
yield scrapy.Request(
response.meta["chacha_url_pre"] + '&tab=susong&box=wenshu&p=' + str(page_n),
encoding='utf-8',
callback=self.generate_firm_susong,
cookies=self.qicha_cookie,
meta={"item": qitem, "chacha_url_pre": response.meta["chacha_url_pre"], "page_n": int(page_n)+1}
)
评论列表
文章目录