def fillProxyPool(self):
global offset
while self.llen < self.size:
url = self.url + '&offset=' + str(offset)
offset += 50
ua = UserAgent()
headers = {'User-Agent' : ua.random}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
lists = soup.find('tbody').find_all('tr')
for ls in lists:
tds = ls.find_all('td')
proxy = ''.join(tds[0].text.split())
_type = ''.join(tds[1].text.split()).lower()
validity = self.checkValidity(_type, proxy)
if validity == True:
self.r.lpush(_type, proxy)
print '1 proxy added: %s. http: %d; https: %s.' \
%(proxy, self.r.llen('http'), self.r.llen('https'))
self.__class__.llen += self.r.llen('http') + self.r.llen('https')
评论列表
文章目录