def get_xicidaili():
url = "http://www.xicidaili.com/nn/%s"
for i in range(1, 2):
page_url = url % str(i)
print(page_url)
s = requests.session()
req = s.get(page_url, headers=headers)
selector = Selector(text=req.text)
ip_nodes = selector.xpath("//table//tr")
for each in ip_nodes[1:]:
ip = each.xpath("./td[2]/text()").extract()[0]
port = each.xpath("./td[3]/text()").extract()[0]
http_type = each.xpath("./td[6]/text()").extract()[0]
if http_type == "HTTP":
proxies = {
"http": "%s://%s:%s" % ("http", ip, port),
"https": "%s://%s:%s" % ("http", ip, port),
}
try:
r = requests.get('http://www.ip138.com/', proxies=proxies, timeout=5)
if r.status_code == 200:
print("%s:%s is valid" % (ip, port))
except:
print("%s:%s is not valid" % (ip, port))
评论列表
文章目录