def gHarv(dork,site,dP,cxe,output,gnum,maxcount):
global GoogleURLS, tmplist
counter = 0;global gcount;gcount+=1;GoogleURLS = []
try:
CXr = CXdic[cxe]
header = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6)'
saveCount = len(GoogleURLS);cmpslptime = 0;lastlen = 0
while counter < int(maxcount):
jar = cookielib.FileCookieJar("cookies")
query = dP+dork+'+site:'+site
gnum = int(gnum)
results_web = 'http://www.google.com/cse?cx='+CXr+'&q='+query+'&num='+repr(gnum)+'&hl=en&lr=&ie=UTF-8&start=' + repr(counter) + '&sa=N'
request_web = urllib2.Request(results_web);agent = random.choice(header)
request_web.add_header('User-Agent', agent);opener_web = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
text = opener_web.open(request_web).read();strreg = re.compile('(?<=href=")(.*?)(?=")')
names = strreg.findall(text)
for name in names:
if name not in GoogleURLS:
if re.search(r'\(', name) or re.search("<", name) or re.search("\A/", name) or re.search("\A(http://)\d", name):
pass
elif re.search("google", name) or re.search("youtube", name) or re.search(".gov", name) or re.search("blackle", name):
pass
else:
if output == 1:
txtField.insert(END,name+'\n')
else:
pass
GoogleURLS.append(name)
sleeptimer = random.choice(rSA);time.sleep(sleeptimer)
cmpslptime += sleeptimer;counter += int(gnum)
percent = int((1.0*counter/int(maxcount))*100)
laststatstring = 'Current MaxCount : '+repr(counter)+' | Last Query Sleeptimer ('+repr(sleeptimer)+') | Percent Done : '+repr(percent)
statList.append(laststatstring)
modStatus()
TestHost_bttn.configure(state=NORMAL,fg=fgCol)
if iC == True:
for entry in GoogleURLS:
global tmplist
if '=' in entry: tmplist.append(entry)
else:
pass
for url in GoogleURLS:
try:
part = url.split('?')
var = part[1].split('&')
cod = ""
for x in var:
strX = x.split("=")
cod += strX[0]
parmURL = part[0]+cod
if parmURL not in ParmURLS_List and url not in tmplist:
ParmURLS_List.append(parmURL)
tmplist.append(url)
except:
pass
tmplist.sort()
txtField.insert(END,'\nFound URLS: '+repr(len(GoogleURLS))+'\t\tTotal Parm-dupe Checked URLS: '+repr(len(tmplist)))
txtField.insert(END,'\nGoogle Search Finished...\n')
except IOError:
pass
评论列表
文章目录