getProxies.py 文件源码

python
阅读 31 收藏 0 点赞 0 评论 0

项目:SpiderConfig 作者: brady-chen 项目源码 文件源码
def get_data(self):
        self.get_url_and_html()
        for html in self.list_html:
            try:
                soup = BeautifulSoup(str(html), 'lxml')
                # soup??????????????
                if soup.find("tr").findAll("td"):
                    ip_and_port = (
                        soup.find("tr").findAll("td")[1].get_text() +
                        ":" + soup.find("tr").findAll("td")[2].get_text()
                    )
                    proxies = {
                        "http": ip_and_port,
                        "https": ip_and_port
                    }
                    # ??ip?????????2?
                    response = requests.get(
                        "http://1212.ip138.com/ic.asp",
                        headers=headers,
                        proxies=proxies,
                        timeout=2
                    )
                    if response.status_code == 200:
                        self.ip_and_port = ip_and_port
                        print "ip???????" + self.ip_and_port
                        print "??????:{},?????:{},?????:{},?????:{},?????:{},?????:{}".format(
                            str(soup.find("tr").findAll("td")[3].get_text()).replace("\n", ""),
                            soup.find("tr").findAll("td")[4].get_text(),
                            soup.find("tr").findAll("td")[5].get_text(),
                            soup.find("tr").findAll("td")[6].find({"div", "title"}).attrs["title"],
                            soup.find("tr").findAll("td")[7].find({"div", "title"}).attrs["title"],
                            soup.find("tr").findAll("td")[8].get_text(),
                            soup.find("tr").findAll("td")[9].get_text()
                        )
                        break
                    else:
                        print "http????200"
                        raise requests.ConnectionError
            except requests.ReadTimeout:
                print "?ip??????????????ip"
            except requests.ConnectionError:
                print "?ip????"
            except Exception as e:
                print "??????????:%(errorName)s\n?????:\n%(detailInfo)s" % {
                    "errorName": e, "detailInfo": traceback.format_exc()}
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号