def get_data(self):
self.get_url_and_html()
for html in self.list_html:
try:
soup = BeautifulSoup(str(html), 'lxml')
# soup??????????????
if soup.find("tr").findAll("td"):
ip_and_port = (
soup.find("tr").findAll("td")[1].get_text() +
":" + soup.find("tr").findAll("td")[2].get_text()
)
proxies = {
"http": ip_and_port,
"https": ip_and_port
}
# ??ip?????????2?
response = requests.get(
"http://1212.ip138.com/ic.asp",
headers=headers,
proxies=proxies,
timeout=2
)
if response.status_code == 200:
self.ip_and_port = ip_and_port
print "ip???????" + self.ip_and_port
print "??????:{},?????:{},?????:{},?????:{},?????:{},?????:{}".format(
str(soup.find("tr").findAll("td")[3].get_text()).replace("\n", ""),
soup.find("tr").findAll("td")[4].get_text(),
soup.find("tr").findAll("td")[5].get_text(),
soup.find("tr").findAll("td")[6].find({"div", "title"}).attrs["title"],
soup.find("tr").findAll("td")[7].find({"div", "title"}).attrs["title"],
soup.find("tr").findAll("td")[8].get_text(),
soup.find("tr").findAll("td")[9].get_text()
)
break
else:
print "http????200"
raise requests.ConnectionError
except requests.ReadTimeout:
print "?ip??????????????ip"
except requests.ConnectionError:
print "?ip????"
except Exception as e:
print "??????????:%(errorName)s\n?????:\n%(detailInfo)s" % {
"errorName": e, "detailInfo": traceback.format_exc()}
评论列表
文章目录