def process_item(self, item, spider):
"""Check if we need to store the item and decide whether to notify.
"""
# check if already in the database
stored = self.jobs_collection.find_one({'url': item['url']})
valid = True
for data in item:
if not data:
valid = False
raise DropItem("Missing {0}!".format(data))
if valid:
if stored:
item = stored
item['times_seen'] += 1
self.jobs_collection.update(
{'_id': item['_id']}, dict(item), False)
else:
# if not (and if not already set), add date to item
if not item.get('date_added', False):
item['date_added'] = datetime.now().isoformat()
if not item.get('date_posted', False):
item['date_posted'] = datetime.now().isoformat()
item['times_seen'] = 0
self.jobs_collection.insert(item)
return item
评论列表
文章目录