def parse_job(self, response):
"""Parse a joblink into a JobItem.
"""
s = Selector(response)
item = JobItem()
item['url'] = response.url.split('?')[0]
item['site'] = 'CareerBuilder'
item['title'] = s.css('h1::text').extract_first()
item['text'] = s.css('.job-facts::text').extract()
item['text'].extend(s.css('.item').css('.tag::text').extract())
item['text'].extend(s.css('.description::text').extract())
try:
posted = s.xpath(
'//h3[@id="job-begin-date"]/text()').extract_first()
item['date_posted'] = utilities.naturaltime(
posted.replace('Posted ', '')).isoformat()
except Exception as e:
self.logger.error(e)
yield item
评论列表
文章目录