def getsamairdotru():
counter = 1
pxycnt = 0
maxpages = 10
urls = []
pfile = file(output, 'a')
while counter <= maxpages:
if counter < 10: # workaround for page-01 to page-09
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
url = opener.open('http://www.samair.ru/proxy/proxy-0'+repr(counter)+'.htm').read()
else:
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
url = opener.open('http://www.samair.ru/proxy/proxy-'+repr(counter)+'.htm').read()
strings = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}:\d{1,5}'), StripTags(url))
for string in strings:
pfile.write(string+"\n")
pxycnt = pxycnt+1
counter = counter+1
opener.close()
print pxycnt, "\t: Proxies received from : http://www.samair.ru/proxy/"
pfile.close()
评论列表
文章目录