python类spider_opened()的实例源码

proxy_spider.py 文件源码 项目:ip_proxy_pool 作者: leeyis 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self,rule):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.rule = rule
        self.name = rule.name
        self.allowed_domains = rule.allowed_domains.split(',')
        self.start_urls = rule.start_urls.split(',')
        rule_list = []

        # ??`???`???
        if len(rule.next_page):
            rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True))

        rule_list.append(Rule(LinkExtractor(
            allow=rule.allow_url.split(','),
            unique=True),
            follow=True,
            callback='parse_item'))

        self.rules = tuple(rule_list)
        super(ProxySpiderSpider, self).__init__()
proxy_spider.py 文件源码 项目:ip_proxy_pool 作者: leeyis 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        print "spider is running!"
        item = SpiderCrawlLog(
                              spiderID=self.rule.id,
                              spiderName=self.rule.name,
                              status="Running...",
                              startTime=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                              endTime=None,
                              pages=0,
                              items=0
                              )
        session = loadSession()
        log = session.query(SpiderCrawlLog).filter(
            SpiderCrawlLog.spiderID == self.rule.id
            and SpiderCrawlLog.endTime is None)

        # ????spider?????????
        if len(log) == 0:
            session.add(item)
            session.commit()
        else:
            pass
middlewares.py 文件源码 项目:ArticleSpider 作者: mtianyan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:ArticleSpider 作者: mtianyan 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:myplanB 作者: JainulV 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:myplanB 作者: JainulV 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:lichking 作者: melonrun 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:lichking 作者: melonrun 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:ScrapyTutorial 作者: boybeak 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:ScrapyTutorial 作者: boybeak 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:sbdspider 作者: onecer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:sbdspider 作者: onecer 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:CustomsSpider 作者: orangZC 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:CustomsSpider 作者: orangZC 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:byrbbs-py3 作者: ryderchan 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:byrbbs-py3 作者: ryderchan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:python-spider 作者: naginoasukara 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:python-spider 作者: naginoasukara 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:job_scraper 作者: wlabatey 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:job_scraper 作者: wlabatey 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
pipelines.py 文件源码 项目:job_scraper 作者: wlabatey 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
pipelines.py 文件源码 项目:job_scraper 作者: wlabatey 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        file = open('/tmp/jobs.json', 'a+')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file , encoding='utf-8', ensure_ascii=False)
        self.exporter.start_exporting()
middlewares.py 文件源码 项目:Acfun_article_spider 作者: bbbbx 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:Acfun_article_spider 作者: bbbbx 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:Tumblr_Feed_Video_Crawler 作者: VisitBoy 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:Tumblr_Feed_Video_Crawler 作者: VisitBoy 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:Python_Stock_Github 作者: DavidFnck 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:Python_Stock_Github 作者: DavidFnck 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
middlewares.py 文件源码 项目:Spider 作者: Ctrlsman 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
middlewares.py 文件源码 项目:Spider 作者: Ctrlsman 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


问题


面经


文章

微信
公众号

扫码关注公众号