def parse_detail(self,response):
item = CrawldetailsItem()
sel = Selector(response)
try:
item["kd"] = response.meta['kd']
item["title"] = self.get_text(sel,'//*[@id="job_detail"]/dt/h1/@title')
item["company"] = sel.xpath('//*[@id="container"]/div[2]/dl/dt/a/div/h2/text()').extract()[0].strip()
item["city"] = sel.xpath('//*[@id="job_detail"]/dd[1]/p[1]/span[2]/text()').extract()[0]
item["address"] = sel.xpath('//*[@id="container"]/div[2]/dl/dd/div[1]/text()').extract()[0]
industry = sel.xpath('//*[@id="container"]/div[2]/dl/dd/ul[1]/li[1]').extract()[0]
item["industry"] = BeautifulSoup(industry).get_text().encode("utf-8").split(' ')[1].strip()
scale = sel.xpath('//*[@id="container"]/div[2]/dl/dd/ul[1]/li[2]').extract()[0]
item["scale"] = BeautifulSoup(scale).get_text().encode("utf-8").split(' ')[1].strip()
phase = sel.xpath('//*[@id="container"]/div[2]/dl/dd/ul[2]/li').extract()[0]
item["phase"] = BeautifulSoup(phase).get_text().encode("utf-8").split(' ')[1].strip()
item["salary"] = sel.xpath('//*[@id="job_detail"]/dd[1]/p[1]/span[1]/text()').extract()[0]
item["experience"] = sel.xpath('//*[@id="job_detail"]/dd[1]/p[1]/span[3]/text()').extract()[0]
item["education"] = sel.xpath('//*[@id="job_detail"]/dd[1]/p[1]/span[4]/text()').extract()[0]
item["description"] = self.get_text(sel,'//*[@id="job_detail"]/dd[2]')
item["url"] = response.url
item["published"] = sel.xpath('//*[@id="job_detail"]/dd[1]/p[3]/text()').extract()[0][:-8]
item["tag"] = self.get_text(sel, '//*[@id="job_detail"]/dd[1]/p[2]/text()')
except Exception, e:
print e
yield item
评论列表
文章目录