def parse(self, response):
if response.status ==503:
raise CloseSpider("denied by remote server")
sel = Selector(response)
appends = response.meta['appends']
cityname = appends['city']
smexp = appends['cat']
xpath_exp = '//a[text()="Search for more '+smexp+'"]/@href'
if cityname=='??':
moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Hong+Kong', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=???%2C+Hong+Kong']
elif cityname=='Adelaide':
moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Adelaide%2C+Adelaide+South+Australia%2C+Australia', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Adelaide+South+Australia+5000']
elif cityname=='Park La Brea':
moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=South+La+Brea+Avenue%2C+Los+Angeles%2C+CA+90056', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Mid-Wilshire%2C+Los+Angeles%2C+CA', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=North+La+Brea+Avenue%2C+Los+Angeles%2C+CA']
else:
searchmore = sel.xpath(xpath_exp).extract()[0]
moreLink = [response.urljoin(searchmore)]
for link in moreLink:
yield Request(url=link, callback=self.parseBegin, meta={'appends': appends}, dont_filter=True)
评论列表
文章目录