def _urlQuery(self, urlInput):
httplib2.debuglevel = 4
url = "http://urlquery.net/%s"
action_search = url % "search.php?q=%s" % urlInput
conn = urllib2.urlopen(action_search, timeout=60)
content2String = conn.read()
rpd = re.compile('.* 0\sresults\sreturned*', re.IGNORECASE)
rpdFind = re.findall(rpd, content2String)
if not rpdFind:
# Reports found
log.debug('urlquery Reports found')
self.hitcount += 1
urlqueryResults = []
rpd = re.compile("\shref='(.*?)'\>", re.IGNORECASE)
rpdFindReport = re.findall(rpd, content2String)
rpd = re.compile("\<td\>\<a\stitle='(.*?)'\shref='report.php", re.IGNORECASE)
rpdFindReportUrl = re.findall(rpd, content2String)
rpd = re.compile("\<td\salign='center'\>\<b\>(.*?)\<\/b\>\<\/td\>", re.IGNORECASE)
rpdFindAlertsIDS = re.findall(rpd, content2String)
rpd = re.compile("\<td\>\<nobr\>\<center\>(.*?)\<\/center\>\<\/nobr\>\<\/td\>", re.IGNORECASE)
rpdFindDatum = re.findall(rpd, content2String)
rpd = re.compile("align='left'\stitle='(.*?)'\swidth='\d{2}'\sheight='\d{2}'\s/>", re.IGNORECASE)
rpdFindLand = re.findall(rpd, content2String)
i = 0
datum = ''
for datum in rpdFindDatum:
result = {}
result["datum"] = datum
result["alerts_ids"] = rpdFindAlertsIDS[i]
result["country"] = rpdFindLand[i]
result["reportUrl"] = convertDirtyDict2ASCII(rpdFindReportUrl[i])
result["report"] = url % rpdFindReport[i]
urlqueryResults.append(result)
i += 1
urlquery = {'url':urlInput, 'urlResult':urlqueryResults}
else:
log.debug('urlquery Reports NOT found')
urlquery = {'url': urlInput, 'urlResult' : 'NOT listed'}
return urlquery
评论列表
文章目录