yfood.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:YelpCrawlSpider 作者: yjp999 项目源码 文件源码
def parse(self, response):
        if response.status ==503:
            raise CloseSpider("denied by remote server")
        sel = Selector(response)
        appends = response.meta['appends']
        cityname = appends['city']
        smexp = appends['cat']
        xpath_exp = '//a[text()="Search for more '+smexp+'"]/@href'
        if cityname=='??':
            moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Hong+Kong', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=???%2C+Hong+Kong']
        elif cityname=='Adelaide':
            moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Adelaide%2C+Adelaide+South+Australia%2C+Australia', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Adelaide+South+Australia+5000']
        elif cityname=='Park La Brea':
            moreLink = ['http://www.yelp.com/search?cflt='+self.cat+'&find_loc=South+La+Brea+Avenue%2C+Los+Angeles%2C+CA+90056', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=Mid-Wilshire%2C+Los+Angeles%2C+CA', 'http://www.yelp.com/search?cflt='+self.cat+'&find_loc=North+La+Brea+Avenue%2C+Los+Angeles%2C+CA']
        else:
            searchmore = sel.xpath(xpath_exp).extract()[0]
            moreLink = [response.urljoin(searchmore)]

        for link in moreLink:
            yield Request(url=link, callback=self.parseBegin, meta={'appends': appends}, dont_filter=True)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号