def parse_items(self, response):
hxs = Selector(response)
jobs = hxs.xpath('//div[contains(@class, "searchResultTitle")]')
items = []
for job in jobs:
item = Job()
item["title"] = job.xpath('.//h2/a[contains(@id, "TITLE")]/text()').extract()[0].strip()
company = job.xpath('.//p/span[contains(@id, "CONTACT_OFFICE")]/text()').extract()
item["company"] = company[0].strip() if company else "n/a"
item["location"] = job.xpath('.//p/span[contains(@id, "FREE_LOCATION")]/text()').extract()[0].strip()
item["url"] = job.xpath('.//h2/a[contains(@id, "TITLE")]/@href').extract()[0]
item["date_posted"] = job.xpath('.//p/span[contains(@id, "POSTED_DATE")]/text()').extract()[0].strip()
salary = job.xpath('.//p/span[contains(@id, "SALARY")]/text()').extract()
item["salary"] = salary[0].strip() if salary else "n/a"
item["crawl_timestamp"] = datetime.now().strftime("%H:%M:%S %Y-%m-%d")
item["job_board"] = "dice"
items.append(item)
return items
评论列表
文章目录