python类engine_stopped()的实例源码

middlewares.py 文件源码 项目:scrapy-rotating-proxies 作者: TeamHG-Memex 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        s = crawler.settings
        proxy_path = s.get('ROTATING_PROXY_LIST_PATH', None)
        if proxy_path is not None:
            with codecs.open(proxy_path, 'r', encoding='utf8') as f:
                proxy_list = [line.strip() for line in f if line.strip()]
        else:
            proxy_list = s.getlist('ROTATING_PROXY_LIST')
        if not proxy_list:
            raise NotConfigured()
        mw = cls(
            proxy_list=proxy_list,
            logstats_interval=s.getfloat('ROTATING_PROXY_LOGSTATS_INTERVAL', 30),
            stop_if_no_proxies=s.getbool('ROTATING_PROXY_CLOSE_SPIDER', False),
            max_proxies_to_try=s.getint('ROTATING_PROXY_PAGE_RETRY_TIMES', 5),
            backoff_base=s.getfloat('ROTATING_PROXY_BACKOFF_BASE', 300),
            backoff_cap=s.getfloat('ROTATING_PROXY_BACKOFF_CAP', 3600)
        )
        crawler.signals.connect(mw.engine_started,
                                signal=signals.engine_started)
        crawler.signals.connect(mw.engine_stopped,
                                signal=signals.engine_stopped)
        return mw
pipelines.py 文件源码 项目:scrapy_site 作者: hl10502 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self):
        dispatcher.connect(self.spider_opended, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

        # ????????????scrapy_site??????
        self.curpath = os.getcwd()
        #?????????????
        self.spidername_filepath = self.curpath + "/scrapy_site/msg/"

        # ?????keyword.conf????????
        self.keywordsDict = dict()
        self.getKeywords()

        #????????????
        self.webnamesDict = dict()
        self.getWebnames()

        # ????
        self.msgDict = dict()

        SavePipeline.initCount = SavePipeline.initCount + 1
pipelines.py 文件源码 项目:housebot 作者: jbkopecky 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self):
        self.conn = None
        dispatcher.connect(self.initialize, signals.engine_started)
        dispatcher.connect(self.finalize, signals.engine_stopped)
webservice.py 文件源码 项目:scrappy 作者: DormyMo 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        if not crawler.settings.getbool('JSONRPC_ENABLED'):
            raise NotConfigured
        self.crawler = crawler
        logfile = crawler.settings['JSONRPC_LOGFILE']
        self.portrange = [int(x) for x in crawler.settings.getlist('JSONRPC_PORT', [6023, 6073])]
        self.host = crawler.settings.get('JSONRPC_HOST', '127.0.0.1')
        root = RootResource(crawler)
        root.putChild('crawler', CrawlerResource(self.crawler))
        # root.putChild('spidercls', CrawlerResource(self.crawler.__dict__['spidercls']))
        server.Site.__init__(self, root, logPath=logfile)
        self.noisy = False
        crawler.signals.connect(self.start_listening, signals.engine_started)
        crawler.signals.connect(self.stop_listening, signals.engine_stopped)
pipelines.py 文件源码 项目:Android-Repackaged-App-Detection-System 作者: M157q 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self):
        self.filename += settings.MARKET_NAME
        self.filename += ".db"
        self.filename = path.join(settings.DATABASE_DIR, self.filename)
        print self.filename
        self.conn = None
        dispatcher.connect(self.initialize, signals.engine_started)
        dispatcher.connect(self.finalize, signals.engine_stopped)
signals.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.engine_started, signals.engine_started)  # ????
        crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped)  # ????
        crawler.signals.connect(pipeline.item_scraped, signals.item_scraped)  # ??????????
        crawler.signals.connect(pipeline.item_dropped, signals.item_dropped)  # ??????????
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)  # ????????????
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)  # ????????????
        crawler.signals.connect(pipeline.spider_idle, signals.spider_idle)      # ????
        crawler.signals.connect(pipeline.spider_error, signals.spider_error)    # ????
        crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled)    # ??????
        crawler.signals.connect(pipeline.request_dropped, signals.request_dropped)    # ??????
        crawler.signals.connect(pipeline.response_received, signals.response_received)    # ????
        crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded)    # ????
        return pipeline
signals.py 文件源码 项目:scrapy_project 作者: zhanghe06 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def engine_stopped(self):
        """
        ????
        :return:
        """
        print time.strftime("%Y-%m-%d %H:%M:%S"), 'Pipeline   Signals: engine_stopped'
        pass
run.py 文件源码 项目:decoration-design-crawler 作者: imflyn 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self):
        self.is_running = False
        dispatcher.connect(self.pause_crawler, signals.engine_stopped)
        self.setting = get_project_settings()
        self.process = None
run.py 文件源码 项目:decoration-design-crawler 作者: imflyn 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self):
        self.is_running = False
        dispatcher.connect(self.pause_crawler, signals.engine_stopped)
        self.setting = get_project_settings()
        self.process = None
keyword_ranking_spider.py 文件源码 项目:amazon_spider 作者: rangerdong 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.items = {}
        self.found = {}
        dispatcher.connect(self.init_scrapy, signals.engine_started)
        dispatcher.connect(self.close_scrapy, signals.engine_stopped)
detail_spider.py 文件源码 项目:amazon_spider 作者: rangerdong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, asin, daily=0, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.asin = asin
        self.last_review = 0
        self.profile_update_self = False    # profile??????
        self.updated = False   # profile?????
        self.daily = True if int(daily) == 1 else False  # ?????????
        self.start_urls = [
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=three_star' % self.asin,
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=two_star' % self.asin,
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=one_star' % self.asin
        ]
        dispatcher.connect(self.update_profile_self, signals.engine_stopped)
        dispatcher.connect(self.init_profile, signals.engine_started)
manager.py 文件源码 项目:malspider 作者: ciscocsirt 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def webdriver(self):
        """Return the webdriver instance, instantiate it if necessary."""
        if self._webdriver is None:
            short_arg_classes = (webdriver.Firefox, webdriver.Ie)
            if issubclass(self._browser, short_arg_classes):
                cap_attr = 'capabilities'
            else:
                cap_attr = 'desired_capabilities'
            options = self._options
            options[cap_attr] = self._desired_capabilities
            self._webdriver = self._browser(**options)
            self._webdriver.set_window_size(settings.DRIVER_WINDOW_WIDTH, settings.DRIVER_WINDOW_HEIGHT)
            self._webdriver.set_page_load_timeout(self.crawler.settings.get('DOMAIN_TIMEOUT', 30))
            self.crawler.signals.connect(self._cleanup, signal=engine_stopped)
        return self._webdriver
keyword_ranking_spider.py 文件源码 项目:amazon-scrapy 作者: dynamohuang 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.items = {}
        self.found = {}
        self.keyword_pool = {}
        self.store_poll = {}
        self.store_date = {}
        dispatcher.connect(self.init_scrapy, signals.engine_started)
        dispatcher.connect(self.close_scrapy, signals.engine_stopped)
review_detail_spider.py 文件源码 项目:amazon-scrapy 作者: dynamohuang 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, asin, daily=0, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.asin = asin
        self.last_review = 0
        self.profile_update_self = False    # profile??????
        self.updated = False   # profile?????
        self.daily = True if int(daily) == 1 else False  # ?????????
        self.start_urls = [
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=three_star' % self.asin,
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=two_star' % self.asin,
            'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=one_star' % self.asin
        ]
        dispatcher.connect(self.update_profile_self, signals.engine_stopped)
        dispatcher.connect(self.init_profile, signals.engine_started)
__init__.py 文件源码 项目:scrapy-qtwebkit 作者: ArturGaspar 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, signal_manager, app):
        super(_QApplicationStopper, self).__init__()
        self._qapplication = weakref.ref(app)
        self.signals = signal_manager
        self.signals.connect(self, signal=signals.engine_stopped, weak=False)
__init__.py 文件源码 项目:scrapy-qtwebkit 作者: ArturGaspar 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __call__(self):
        self.signals.disconnect(self, signals.engine_stopped)
        app = self._qapplication()
        if app is not None:
            app.quit()
__init__.py 文件源码 项目:scrapy-qtwebkit 作者: ArturGaspar 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def engine_stopped():
        if QApplication.instance():
            QApplication.instance().quit()
middlewares.py 文件源码 项目:scrapy-rotating-proxies 作者: TeamHG-Memex 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def engine_stopped(self):
        if self.log_task.running:
            self.log_task.stop()
        if self.reanimate_task.running:
            self.reanimate_task.stop()
pipelines.py 文件源码 项目:scrapy_site 作者: hl10502 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def engine_stopped(self):
        print ('Pipeline???????========%s' % SavePipeline.initCount)


问题


面经


文章

微信
公众号

扫码关注公众号