def parse(self,response):
reload(sys)
sys.setdefaultencoding('utf8')
print '__________'
if response.status == 403:
print 'meet 403, sleep 600 sconds'
import time
time.sleep(1200)
yield Request(response.url,callback=self.parse)
#404,????????????
elif response.status == 404:
print 'meet 404,return'
else:
hxs = scrapy.Selector(response)
for i in range(1,31):
item = SoufangItem()
name_ = hxs.xpath('/html/body/div[4]/div[1]/ul/li['+str(i)+']/div[1]/div[1]/a/text()').extract()
name = ''.join(name_)
http = hxs.xpath('/html/body/div[4]/div[1]/ul/li['+str(i)+']/div[1]/div[1]/a/@href').extract()
href = ''.join(http)
#href = href + 'xiangqing/'
item['name'] = name.encode('gbk')
item['link'] = href.encode('gbk')
yield Request(href,callback=self.parse_detail,meta={'item':item})
print name, href
print '__________'
评论列表
文章目录