def parse(self, response):
data = json.loads(response.text)
converter = html2text.HTML2Text()
for job in data['hits']['hits']:
item = JobItem()
item['url'] = urljoin(
"https://www.workingnomads.co/jobs/",
job['_source']['slug'])
item['title'] = job['_source']['title']
item['site'] = 'WorkingNomads'
item['text'] = converter.handle(job['_source']['description'])
item['text'] = [item['text'] + ' '.join(item.get('tags', []))]
try:
posted = converter.handle(job['_source']['pub_date'])
item['date_posted'] = posted.split('+')[0]
except Exception as e:
self.logger.error(e)
yield item
评论列表
文章目录