spiders.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:undercrawler 作者: TeamHG-Memex 项目源码 文件源码
def handle_form(self, url, form, meta):
        action = canonicalize_url(urljoin(url, form.action))
        if not self.link_extractor.matches(action):
            return
        if (meta['form'] == 'search' and
                self.settings.getbool('CRAZY_SEARCH_ENABLED') and
                action not in self.handled_search_forms and
                len(self.handled_search_forms) <
                self.settings.getint('MAX_DOMAIN_SEARCH_FORMS')):
            self.logger.debug('Found a search form at %s', url)
            self.handled_search_forms.add(action)
            for request_kwargs in search_form_requests(
                    url, form, meta,
                    search_terms=self.search_terms,
                    extra_search_terms=self.extra_search_terms):
                request_kwargs['meta'] = {'is_search': True}
                request_kwargs['cls'] = \
                    SplashFormRequest if self.use_splash else FormRequest
                yield request_kwargs
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号