def geturls(url):
try:
print "[+] Collecting:",url
page = urllib2.urlopen(url).read()
links = re.findall(('http://\w+.\w+\.\w+[/\w+.]*[/.]\w+'), page)
for link in links:
if link not in urls and link[-3:].lower() not in ("gif","jpg","png","ico"):
urls.append(link)
except(IOError,TypeError,AttributeError,httplib.BadStatusLine,socket.error): pass
return urls
评论列表
文章目录