def parse_page(self, response):
pattern = re.compile(
'<tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>('
'.*?)</td><td>(.*?)</td></tr>',
re.S)
items = re.findall(pattern, response.body)
if items is not None:
for item in items:
proxy = Proxy()
proxy.set_value(
ip = item[0],
port = item[1],
country = item[3],
anonymity = item[4],
source = self.name,
)
self.add_proxy(proxy)
评论列表
文章目录