def generate_firm_content(self, response):
qitem = YQichachaItem()
qitem._id = re.search(u'firm_(.*)(\.html)$', response.url).group(1)
qitem.name = response.xpath("//div[contains(@class, 'company-top-name')]/text()").extract()[0]
base_info = list()
base_info.append({"????": self.clean_content(response.xpath(
"//span[contains(@class, 'm_comInfo')]").extract()[0])})
qitem.base_info = base_info
qitem.save()
chacha_url_pre = self.url_qichacha_pre + '/company_getinfos?unique=' + qitem._id + '&companyname='+qitem.name
yield scrapy.Request(
chacha_url_pre +'&tab=base',
callback=self.generate_firm_base,
cookies=self.qicha_cookie,
encoding='utf-8',
meta={"item": qitem, "chacha_url_pre":chacha_url_pre}
)
评论列表
文章目录