def get_data(self, search_query):
'''helper method to get data from google images by scraping and parsing'''
params = {"site": "imghp", "tbm": "isch", "tbs": "isz:l", "q": search_query}
headers = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows Phone OS 7.0; Trident/3.1; \
IEMobile/7.0; LG; GW910)'}
html = ''
try:
html = requests.get('https://www.google.com/search', headers=headers, params=params, timeout=5).text
except Exception as exc:
log_exception(__name__, exc)
soup = BeautifulSoup.BeautifulSoup(html)
results = []
for div in soup.findAll('div'):
if div.get("id") == "images":
for a_link in div.findAll("a"):
page = a_link.get("href")
try:
img = page.split("imgurl=")[-1]
img = img.split("&imgrefurl=")[0]
results.append(img)
except Exception:
pass
return results
评论列表
文章目录