def __init__(self, website):
self.name = website.spider_name
self.redis_key = website.spider_name + ":start_urls"
self.website = website
self.allowed_domains = website.allow_domains.split(";")
self.start_urls = website.start_urls.split(";")
rule_list = []
rules_to_follow = website.rules_to_follow.split(";")
rules_to_parse = website.rules_to_parse.split(";")
rule_list.append(
Rule(LinkExtractor(allow=rules_to_parse), follow=True, callback='parse_detail')
)
rule_list.append(
Rule(LinkExtractor(allow=rules_to_follow), follow=True)
)
self.rules = tuple(rule_list)
super(ArticleSpider, self).__init__()
评论列表
文章目录