crawlall.py 文件源码-python代码片段

crawlall.py 文件源码
python
阅读 20 收藏 0 点赞 0 评论 0
def run(self, args, opts):
        conn = redis.Redis(decode_responses=True)
        runner = CrawlerRunner(get_project_settings())
        try:
            rules = Rule.loads()
            if not rules:
                raise ValueError
        except ValueError:
            print('Error in loading Redis rules, fallback to CSV rules')
            rules = Rule.loads('csv')
        for rule in rules:
            rule.save()
            if rule.name in self.excludes:
                continue
            if conn.hget('Rule:' + rule.name, 'status') == 'started':
                d = runner.crawl(ProxySpider, rule)
                # Set status to stopped if crawler finished
                d.addBoth(lambda _: conn.hset(
                    'Rule:' + rule.name, 'status', 'finished'))
        rule_maintainer = RuleMaintainer(conn, runner)
        proxy_maintainer = ProxyMaintainer(conn)
        schedule_maintainer = ScheduleMaintainer(conn)
        lc = task.LoopingCall(rule_maintainer)
        lc.start(1)
        lc = task.LoopingCall(proxy_maintainer)
        lc.start(0.5)
        lc = task.LoopingCall(schedule_maintainer)
        lc.start(10)
        reactor.run()