dianpingxmtspider.py 文件源码-python代码片段

dianpingxmtspider.py 文件源码

python

阅读 16 收藏 0 点赞 0 评论 0

项目：spider_scrapy_lianjia 作者: stamhe 项目源码文件源码

def parse(self, response):
        sel = Selector(response)
        if response.meta.has_key("shop_type"):
            shop_type = response.meta['shop_type']
        else:
            shop_type = self.shop_type_map[response.url]['shop_type']

        if response.meta.has_key("city_id"):
            city_id = response.meta['city_id']
        else:
            city_id = self.shop_type_map[response.url]['city_id']

        cat_url = response.url
        http_status = response.status
        self.log("http_status = %s proxy = %s" % (http_status, response.meta['proxy']))

        self.log("shop_type = %s" % shop_type)
        items = []
        shop_list = sel.xpath('//div[@id="region-nav"]/a')
        for shop in shop_list:
            uri = shop.xpath('@href').extract()[0]
            self.log("page_uri = %s" % uri)
            yield scrapy.Request('http://www.dianping.com' + uri, callback=self.parse_list, meta={'shop_type':shop_type, 'cat_url' : cat_url, 'city_id' : city_id})