def parse(self, response):
for i in range(10):
self.current += 1
if self.current >= self.rule.max_page:
break
yield Request(self.rule.url_fmt.format(self.current))
if response.status != 200:
return None
ip_list = response.xpath(self.rule.row_xpath)[1:]
for ip_item in ip_list:
l = ProxyItemLoader(item=ProxyItem(), selector=ip_item)
l.add_xpath('proxy', self.rule.host_xpath)
l.add_xpath('proxy', self.rule.port_xpath)
l.add_xpath('ip', self.rule.host_xpath)
l.add_xpath('port', self.rule.port_xpath)
l.add_xpath('addr', self.rule.addr_xpath)
l.add_xpath('mode', self.rule.mode_xpath)
l.add_xpath('protocol', self.rule.proto_xpath)
l.add_xpath('validation_time', self.rule.vt_xpath)
l.add_value('src_rule', self.rule.name)
yield l.load_item()
评论列表
文章目录