def parse(self, response):
"""
Override function of the class scrapy.Spider. Called when response is obtained
:param response: Response object used to get the details of the webpage
"""
for href in response.xpath("//a/@href").extract():
# Iterating over all the urls in the google search page
if href[:7] == '/url?q=' and is_standard_website(href):
# Getting the search results alone
url = href[7:].split('&')[0]
# starting another request for each search result url
yield scrapy.Request(url, meta={'download_maxsize': 2097152}, callback=self.parse_result_contents)
评论列表
文章目录