def parse(self, response):
item = DoubanspiderItem()
selector = Selector(response)
Movies = selector.xpath('//div[@class="info"]')
for eachMovie in Movies:
title = eachMovie.xpath('div[@class="hd"]/a/span[@class="title"]/text()').extract()
movieInfo = eachMovie.xpath('div[@class="bd"]/p/text()').extract()
star = eachMovie.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()').extract()
quote = eachMovie.xpath('div[@class="bd"]/p[@class="quote"]/span/text()').extract()
item['title'] = title
item['movieInfo'] = ';'.join(movieInfo)
item['star'] = star
item['quote'] = quote
# ??item
yield item
nextLink = selector.xpath('//span[@class="next"]/link/@href').extract()
if nextLink:
nextLink = nextLink[0]
print(nextLink)
yield Request(self.url + nextLink,callback=self.parse)
评论列表
文章目录