def __init__(self,rule):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.rule = rule
self.name = rule.name
self.allowed_domains = rule.allowed_domains.split(',')
self.start_urls = rule.start_urls.split(',')
rule_list = []
# ??`???`???
if len(rule.next_page):
rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True))
rule_list.append(Rule(LinkExtractor(
allow=rule.allow_url.split(','),
unique=True),
follow=True,
callback='parse_item'))
self.rules = tuple(rule_list)
super(ProxySpiderSpider, self).__init__()
评论列表
文章目录