def crawl(spider, *args, **kwargs):
"""Run a spider.
Args:
spider (str): The Scrapy `name` of the spider.
"""
settings = get_project_settings()
if kwargs.get('ignore_robots_txt') is True:
settings.attributes.get('ROBOTSTXT_OBEY').value = False
proc = CrawlerProcess(settings)
try:
proc.crawl(spider, *args, **kwargs)
proc.start()
except KeyError as err:
# Log a warning if the scraper name is invalid instead of
# causing the job to fail.
# NOTE: If there is any other type of error, the job will fail, and all
# the jobs that depend on it will fail as well.
logger.warning(err.args[0])
评论列表
文章目录