eventcalendar_scanner.py 文件源码-python代码片段

eventcalendar_scanner.py 文件源码

python

阅读 37 收藏 0 点赞 0 评论 0

项目：darkc0de-old-stuff 作者: tuwid 项目源码文件源码

def geturls(query,num):
     print "[+] getting urls"
     counter =  10
     urls = []
     while counter < int(num):
          url = 'http://www.google.com/search?hl=en&q='+query+'&hl=en&lr=&start='+repr(counter)+'&sa=N'
          #url = "http://search.lycos.com/?query="+query+"&page="+repr(counter)
          opener = urllib2.build_opener(url)
          opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')]
          data = opener.open(url).read()
          print data
          hosts = re.findall(('\w+\.[\w\.\-/]*\.\w+'),StripTags(data))
          #hosts = re.findall('<span class=\"?grnLnk small\"?>http:\/\/(.+?)\/',data)
          for x in hosts:
               if x.find('www') != -1:
                    x = x[x.find('www'):]
                    if x not in urls and re.search("google", x) == None:
                         urls.append(x)
          counter += 10
     for url in urls:

          print url

     return urls