dianpingxmtbabyspider.py 文件源码-python代码片段

dianpingxmtbabyspider.py 文件源码

python

阅读 17 收藏 0 点赞 0 评论 0

项目：spider_scrapy_lianjia 作者: stamhe 项目源码文件源码

def parse_category(self, response):
        self.log("=================================================")
        sel = Selector(response)
        shop_type = response.meta['shop_type']
        city_id = response.meta['city_id']

        cat_url = response.url
        http_status = response.status
        self.log("http_url = %s" % cat_url)
        self.log("http_status = %s proxy = %s" % (http_status, response.meta['proxy']))

        self.log("shop_type = %s" % shop_type)
        items = []
        shop_list = sel.xpath('//li[@class="t-item-box t-district J_li"]/div[@class="t-item"]/div[@class="t-list"]/ul/li')
    self.log("shop_list_len = %d" % len(shop_list))
        for shop in shop_list:
            uri = shop.xpath('a/@href').extract()[0]
            self.log("page_uri = %s" % uri)
            yield scrapy.Request('http://www.dianping.com' + uri, callback=self.parse_list, meta={'shop_type':shop_type, 'cat_url' : cat_url, 'city_id' : city_id})