python类spider_closed()的实例源码

exporter_json_lines.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_json_lines = self.files.pop(spider)
        file_json_lines.close()
stats.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)  # ????????????
        return pipeline
stats.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def spider_closed(self, spider, reason):
        """
        ????????????
        :param spider:
        :param reason: finished/cancelled/shutdown
        :return:
        """
        print time.strftime("%Y-%m-%d %H:%M:%S"), 'StatsPipeline   Signals: spider_closed'
        print spider.crawler.stats.get_stats()
        print spider.crawler.stats.get_value('downloader/request_count', 0)  # ????
        print spider.crawler.stats.get_value('downloader/response_count', 0)  # ????
        print spider.crawler.stats.get_value('response_received_count', 0)  # ??????
        print spider.crawler.stats.get_value('item_dropped_count', 0)  # ??????
        print spider.crawler.stats.get_value('item_scraped_count', 0)  # ??????
signals.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.engine_started, signals.engine_started)  # ????
        crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped)  # ????
        crawler.signals.connect(pipeline.item_scraped, signals.item_scraped)  # ??????????
        crawler.signals.connect(pipeline.item_dropped, signals.item_dropped)  # ??????????
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)  # ????????????
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)  # ????????????
        crawler.signals.connect(pipeline.spider_idle, signals.spider_idle)      # ????
        crawler.signals.connect(pipeline.spider_error, signals.spider_error)    # ????
        crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled)    # ??????
        crawler.signals.connect(pipeline.request_dropped, signals.request_dropped)    # ??????
        crawler.signals.connect(pipeline.response_received, signals.response_received)    # ????
        crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded)    # ????
        return pipeline
exporter_csv.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
exporter_csv.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_csv = self.files.pop(spider)
        file_csv.close()
exporter_xml.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
exporter_xml.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_xml = self.files.pop(spider)
        file_xml.close()
exporter_json.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
full_domain_spider.py 文件源码 项目:malspider 作者: ciscocsirt 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, *args, **kwargs):
        super(FullDomainSpider, self).__init__(*args, **kwargs)
        self.allowed_domains = kwargs.get('allowed_domains').split(',')
        self.org = kwargs.get('org')
        self.start_urls = kwargs.get('start_urls').split(',')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
full_domain_spider.py 文件源码 项目:malspider 作者: ciscocsirt 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        try:
            self.conn.close()
        except:
            los.msg("Could not close database connection", level=log.ERROR)
pipelines.py 文件源码 项目:scrapy_rss 作者: woxcab 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
pipelines.py 文件源码 项目:scrapy_rss 作者: woxcab 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporters[spider].finish_exporting()
        file = self.files.pop(spider)
        file.close()
test_exporter.py 文件源码 项目:scrapy_rss 作者: woxcab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __exit__(self, exc_type, exc_val, exc_tb):
        responses = self.crawler.signals.send_catch_log(signal=signals.spider_closed,
                                                        spider=self.spider, reason=None)
        for _, failure in responses:
            if failure:
                failure.raiseException()
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def process_item(self, item, spider):
        self.count += 1
        if self.count > 1000:
            self.count = 0
            self.file_count += 1
            self.spider_closed()
            self.spider_opened()
        self.exporter.export_item(item)
        return item
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()
pipelines.py 文件源码 项目:Spider 作者: poluo 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline


问题


面经


文章

微信
公众号

扫码关注公众号