task.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:BlogSpider 作者: hack4code 项目源码 文件源码
def test_spider(setting):
    setting = setting.copy()
    spid = str(uuid.uuid4())
    setting['_id'] = spid
    try:
        cls = SpiderFactory.mkspider(setting)
    except SpiderFactoryException as e:
        logger.error('Error in test_spider SpiderFactory[%s]',
                     e)
        return False
    url = SETTINGS['TEMP_SPIDER_STATS_URL']
    TEST_SETTINGS = {
        'EXTENSIONS': {'mydm.extensions.ExtensionStats': 900,
                       'scrapy.extensions.logstats.LogStats': None,
                       'scrapy.extensions.spiderstate.SpiderState': None,
                       'scrapy.extensions.telnet.TelnetConsole': None, },
        'SPIDER_STATS_URL': url,
        'BOT_NAME': 'TestSpider',
        'WEBSERVICE_ENABLED': False,
        'TELNETCONSOLE_ENABLED': False,
        'LOG_LEVEL': 'INFO',
        'LOG_FORMAT': '%(asctime)s-%(levelname)s: %(message)s',
        'LOG_DATEFORMAT': '%Y-%m-%d %H:%M:%S'
    }

    configure_logging(TEST_SETTINGS,
                      install_root_handler=False)
    logging.getLogger('scrapy').setLevel(logging.WARNING)
    runner = CrawlerRunner(TEST_SETTINGS)
    d = runner.crawl(cls)
    d.addBoth(lambda _: reactor.stop())
    logger.info('test_spider reator starting ...')
    reactor.run()
    logger.info('test_spider reator stopped')
    stats = get_stats(url,
                      [spid])
    n = stats[spid]
    return True if n > 0 else False
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号