def search(self, search_term):
#Perform the search and get the text of the page.
params = {'q' : search_term,
'btnG' : 'Google Search'}
text = self.connection.get(GoogleSearch.google_url, params)
if not text:
return None
#Pull out the links of results
start = text.find('<div id="res">')
end = text.find('<div id="foot">')
if text[start:end] == '':
self.logger.warn("No results for `{}`".format(search_term))
return None
links = []
text = text[start:end]
start = 0
end = 0
while start>-1 and end>-1:
start = text.find('<a href="/url?q=')
text = text[start+len('<a href="/url?q='):]
end = text.find('&sa=U&ei=')
if start>-1 and end>-1:
link = unquote(text[0:end])
text = text[end:len(text)]
if link.find('http')==0:
links.append(link)
#If necessary, filter the links based on content.
if len(self.restrict_to) > 0:
filtered_links = []
for link in links:
for domain in self.restrict_to:
if domain in link:
filtered_links.append(link)
links = list(set(filtered_links))
return links
评论列表
文章目录