pipelines.py 文件源码-python代码片段

pipelines.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：mensa-tracker 作者: annyanich 项目源码文件源码

def process_item(self, item, spider):

        db_matches = db.session.query(DBMenuEntry).filter_by(
            category=item['category'],
            mensa=item['mensa'],
            description=item['description'],
            date_valid=item['date_valid'],
            allergens=item['allergens'],
            price=item['price']
        ).all()

        if db_matches:
            # If there is more than one matching entry in the database, we probably
            # already saved a duplicate by accident.  I really hope that doesn't happen.
            assert(len(db_matches) == 1)

            spider.crawler.stats.inc_value('items_already_in_db')
            raise DropItem(
                "Menu item already found in database.\n"
                "Previously scraped on: {previous_scrape_time}".format(
                    previous_scrape_time=str(db_matches[0].time_scraped)))
        else:
            return item