def run(self, args, opts):
conn = redis.Redis(decode_responses=True)
runner = CrawlerRunner(get_project_settings())
try:
rules = Rule.loads()
if not rules:
raise ValueError
except ValueError:
print('Error in loading Redis rules, fallback to CSV rules')
rules = Rule.loads('csv')
for rule in rules:
rule.save()
if rule.name in self.excludes:
continue
if conn.hget('Rule:' + rule.name, 'status') == 'started':
d = runner.crawl(ProxySpider, rule)
# Set status to stopped if crawler finished
d.addBoth(lambda _: conn.hset(
'Rule:' + rule.name, 'status', 'finished'))
rule_maintainer = RuleMaintainer(conn, runner)
proxy_maintainer = ProxyMaintainer(conn)
schedule_maintainer = ScheduleMaintainer(conn)
lc = task.LoopingCall(rule_maintainer)
lc.start(1)
lc = task.LoopingCall(proxy_maintainer)
lc.start(0.5)
lc = task.LoopingCall(schedule_maintainer)
lc.start(10)
reactor.run()
评论列表
文章目录