def parseBegin(self, response):
if response.status ==503:
raise CloseSpider("denied by remote server")
sel = Selector(response)
appends = response.meta['appends']
cityName = appends['city']
category = appends['cat']
locations = self.getLocations(response.body)
if locations == []:
# self.logger.error("location is []: %s\t%s", response.url, str(cityName))
return
div_a = sel.xpath('//li[@class="regular-search-result"]/div/div[@class="biz-listing-large"]')
for ii, div in enumerate(div_a):
# pdb.set_trace()
main = div.xpath('./div[1]/div/div[2]/h3/span/a[@class="biz-name"]')
item = FoodItem()
url = main.xpath('./@href').extract()
item['url'] = response.urljoin(url[0])
item['name'] = main.xpath('./span/text()').extract()[0]
# pdb.set_trace()
second = div.xpath('./div[2]')
address = second.xpath('./address').extract()
region = second.xpath('./span[@class="neighborhood-str-list"]/text()').extract()
if address:
item['address'] = self.filtertags(address[0])
else:
item['address'] = ""
if region:
item['region'] = (region[0]).strip()
else:
item['region'] = ""
item['city'] = cityName.strip()
item['category'] = category
item['location'] = eval(locations[ii])
yield item
time.sleep(1.0)
nextPage = sel.xpath('//a[@class="u-decoration-none next pagination-links_anchor"]/@href').extract()
if nextPage:
nextLink = response.urljoin(nextPage[0])
yield Request(url=nextLink, callback=self.parseBegin, meta={'appends':appends}, dont_filter=True)
评论列表
文章目录