python类item_scraped()的实例源码

extensions.py 文件源码 项目:scrapy_redis_spider 作者: lymlhhj123 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        instance = cls(crawler.stats)
        crawler.signals.connect(instance.item_dropped, 
                                signal=signals.item_dropped)
        crawler.signals.connect(instance.item_scraped, 
                                signal=signals.item_scraped)
        crawler.signals.connect(instance.response_received, 
                                signal=signals.response_received)
        crawler.signals.connect(instance.response_downloaded, 
                                signal=signals.response_downloaded)
        crawler.signals.connect(instance.item_saved,
                                signal=mysignals.item_saved)
        crawler.signals.connect(instance.item_saved_failed,
                                signal=mysignals.item_saved_failed)
        crawler.signals.connect(instance.html_saved,
                                signal=mysignals.html_saved)
        crawler.signals.connect(instance.html_saved_failed,
                                signal=mysignals.html_saved_failed)
        crawler.signals.connect(instance.timeouterror,
                                signal=mysignals.timeouterror)
        crawler.signals.connect(instance.dnslookuperror,
                                signal=mysignals.dnslookuperror)
        return instance
extension.py 文件源码 项目:tipi-engine 作者: CIECODE-Madrid 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        self.crawler = crawler
        self.initiatives = 0
        self.amendments = 0
        self.finishtext = 0
        self.responses = 0
        self.members = 0
                # connect the extension object to signals
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
extension.py 文件源码 项目:tipi-engine 作者: CIECODE-Madrid 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def item_scraped(self, item, spider):
        if isinstance(item, InitiativeItem):
            self.initiatives += 1

        elif isinstance(item, AmendmentItem):
            self.amendments += 1

        elif isinstance(item, FinishTextItem):
            self.finishtext += 1
        elif isinstance(item, ResponseItem):
            self.responses += 1
        elif isinstance(item, MemberItem):
            self.responses += 1
spiders.py 文件源码 项目:sbdspider 作者: onecer 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def setup_redis(self):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.
        """
        if not self.redis_key:
            self.redis_key = '%s:start_urls' % self.name

        self.server = connection.from_settings(self.crawler.settings)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from redis queue
        self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        self.log("Reading URLs from redis list '%s'" % self.redis_key)
spiders.py 文件源码 项目:sbdspider 作者: onecer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def item_scraped(self, *args, **kwargs):
        """Avoids waiting for the spider to  idle before scheduling the next request"""
        self.schedule_next_request()
stats.py 文件源码 项目:BlogSpider 作者: hack4code 项目源码 文件源码 阅读 47 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        ext = cls(crawler.stats)
        crawler.signals.connect(ext.spider_opened,
                                signal=signals.spider_opened)
        crawler.signals.connect(ext.spider_closed,
                                signal=signals.spider_closed)
        crawler.signals.connect(ext.item_scraped,
                                signal=signals.item_scraped)

        return ext
stats.py 文件源码 项目:BlogSpider 作者: hack4code 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def item_scraped(self, item, spider):
        pass
extensions.py 文件源码 项目:scrapy_redis_splash_spider 作者: lymlhhj123 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        instance = cls(crawler.stats)
        crawler.signals.connect(instance.item_dropped, 
                                signal=signals.item_dropped)
        crawler.signals.connect(instance.item_scraped, 
                                signal=signals.item_scraped)

        crawler.signals.connect(instance.response_received, 
                                signal=signals.response_received)
        crawler.signals.connect(instance.response_downloaded, 
                                signal=signals.response_downloaded)
        return instance
extensions.py 文件源码 项目:scrapy_redis_splash_spider 作者: lymlhhj123 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def item_scraped(self, item, spider):
        #??item??????itempipeline??????     
        self.stats.inc_value('item/scraped', spider=spider)
spiders.py 文件源码 项目:scrappy 作者: DormyMo 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def setup_redis(self):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.
        """
        if not self.redis_key:
            self.redis_key = '%s:start_urls' % self.name

        self.server = connection.from_settings(self.crawler.settings)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from redis queue
        self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        self.log("Reading URLs from redis list '%s'" % self.redis_key)
spiders.py 文件源码 项目:scrappy 作者: DormyMo 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def item_scraped(self, *args, **kwargs):
        """Avoids waiting for the spider to  idle before scheduling the next request"""
        self.schedule_next_request()
signals.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.engine_started, signals.engine_started)  # ????
        crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped)  # ????
        crawler.signals.connect(pipeline.item_scraped, signals.item_scraped)  # ??????????
        crawler.signals.connect(pipeline.item_dropped, signals.item_dropped)  # ??????????
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)  # ????????????
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)  # ????????????
        crawler.signals.connect(pipeline.spider_idle, signals.spider_idle)      # ????
        crawler.signals.connect(pipeline.spider_error, signals.spider_error)    # ????
        crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled)    # ??????
        crawler.signals.connect(pipeline.request_dropped, signals.request_dropped)    # ??????
        crawler.signals.connect(pipeline.response_received, signals.response_received)    # ????
        crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded)    # ????
        return pipeline
signals.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def item_scraped(self, item, response, spider):
        """
        ??????????
        :param item:
        :param response:
        :param spider:
        :return:
        """
        print time.strftime("%Y-%m-%d %H:%M:%S"), 'Pipeline   Signals: item_scraped'
        pass
extensions.py 文件源码 项目:scrapy_redis_spider 作者: lymlhhj123 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def item_scraped(self, item, spider):
        #??item??????itempipeline??????     
        self.stats.inc_value('item/scraped', spider=spider)


问题


面经


文章

微信
公众号

扫码关注公众号