def parse_category(self, response):
self.log("=================================================")
sel = Selector(response)
shop_type = response.meta['shop_type']
city_id = response.meta['city_id']
cat_url = response.url
http_status = response.status
self.log("http_url = %s" % cat_url)
self.log("http_status = %s proxy = %s" % (http_status, response.meta['proxy']))
self.log("shop_type = %s" % shop_type)
items = []
shop_list = sel.xpath('//li[@class="t-item-box t-district J_li"]/div[@class="t-item"]/div[@class="t-list"]/ul/li')
self.log("shop_list_len = %d" % len(shop_list))
for shop in shop_list:
uri = shop.xpath('a/@href').extract()[0]
self.log("page_uri = %s" % uri)
yield scrapy.Request('http://www.dianping.com' + uri, callback=self.parse_list, meta={'shop_type':shop_type, 'cat_url' : cat_url, 'city_id' : city_id})
dianpingxmtbabyspider.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录