dorkScan.py 文件源码-python代码片段

def cxeSearch(go_inurl,go_site,go_cxe,go_ftype,maxc):
    uRLS = []
    counter = 0
        while counter < int(maxc):
                jar = cookielib.FileCookieJar("cookies")
                query = 'q='+go_inurl+'+'+go_site+'+'+go_ftype
                results_web = 'http://www.google.com/cse?'+go_cxe+'&'+query+'&num='+str(gnum)+'&hl=en&lr=&ie=UTF-8&start=' + repr(counter) + '&sa=N'
                request_web = urllib2.Request(results_web)
        agent = random.choice(header)
                request_web.add_header('User-Agent', agent)
        opener_web = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
                text = opener_web.open(request_web).read()
        strreg = re.compile('(?<=href=")(.*?)(?=")')
                names = strreg.findall(text)
        counter += 100
                for name in names:
                        if name not in uRLS:
                                if re.search(r'\(', name) or re.search("<", name) or re.search("\A/", name) or re.search("\A(http://)\d", name):
                                        pass
                elif re.search("google", name) or re.search("youtube", name) or re.search(".gov", name) or re.search("%", name):
                                        pass
                else:
                                        uRLS.append(name)
    tmpList = []; finalList = []
    print "[+] URLS (unsorted) :", len(uRLS)
        for entry in uRLS:
        try:
            t2host = entry.split("/",3)
            domain = t2host[2]
            if domain not in tmpList and "=" in entry:
                finalList.append(entry)
                tmpList.append(domain)
        except:
            pass
    print "[+] URLS (sorted)   :", len(finalList)
    return finalList