def crawl(args):
spids = args.get('spiders')
configure_logging(SETTINGS,
install_root_handler=False)
logging.getLogger('scrapy').setLevel(logging.WARNING)
runner = CrawlerRunner(SETTINGS)
loader = runner.spider_loader
if 'all' in spids:
spids = loader.list()
spiders = [loader.load(_)
for _ in filter(lambda __: __ in loader.list(),
spids)]
if not spiders:
return False
random.shuffle(spiders)
for __ in spiders:
runner.crawl(__)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
logger.info('crawl reator starting ...')
reactor.run()
logging.info('crawl reator stopped')
评论列表
文章目录