def process_item(self, item, spider):
db_matches = db.session.query(DBMenuEntry).filter_by(
category=item['category'],
mensa=item['mensa'],
description=item['description'],
date_valid=item['date_valid'],
allergens=item['allergens'],
price=item['price']
).all()
if db_matches:
# If there is more than one matching entry in the database, we probably
# already saved a duplicate by accident. I really hope that doesn't happen.
assert(len(db_matches) == 1)
spider.crawler.stats.inc_value('items_already_in_db')
raise DropItem(
"Menu item already found in database.\n"
"Previously scraped on: {previous_scrape_time}".format(
previous_scrape_time=str(db_matches[0].time_scraped)))
else:
return item
评论列表
文章目录