spiders.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:undercrawler 作者: TeamHG-Memex 项目源码 文件源码
def make_request(
            self, url, callback=None, meta=None, cls=None, **kwargs):
        callback = callback or self.parse
        cls = cls or (SplashRequest if self.use_splash else Request)
        if self.use_splash:
            settings = self.settings
            splash_args = {
                'lua_source': self.lua_source,
                'js_source': self.js_source,
                'run_hh': settings.getbool('RUN_HH'),
                'return_png': settings.getbool('SCREENSHOT'),
                'images_enabled': settings.getbool('IMAGES_ENABLED'),
            }
            for s in ['VIEWPORT_WIDTH', 'VIEWPORT_HEIGHT',
                      'SCREENSHOT_WIDTH', 'SCREENSHOT_HEIGHT']:
                if self.settings.get(s):
                    splash_args[s.lower()] = self.settings.getint(s)
            if self.settings.getbool('ADBLOCK'):
                splash_args['filters'] = 'fanboy-annoyance,easylist'
            if self.settings.getbool('FORCE_TOR'):
                splash_args['proxy'] = 'tor'
            kwargs.update(dict(
                args=splash_args,
                endpoint='execute',
                cache_args=['lua_source', 'js_source'],
            ))
        meta = meta or {}
        meta['avoid_dup_content'] = True
        return cls(url, callback=callback, meta=meta, **kwargs)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号