def parse(self,response):
item = DoubanmovieItem()
selector = Selector(response)
movies = selector.xpath('//div[@class="info"]')
for eachmovie in movies:
title = eachmovie.xpath('div[@class="hd"]/a/span/text()').extract()
fullTitle = ''
for each in fullTitle:
fullTitle += each
movieInfo = eachmovie.xpath('div[@class="bd"]/p/text()').extract()
star = eachmovie.xpath('div[@class="bd"]/div[@class="star"]/span/text()').extract()[0]
quote = eachmovie.xpath('div[@class="bd"]/p[@class="quote"]/span/text()').extract()
if quote:
quote = quote[0]
else:
quote = ''
item['title'] = title
item['movieInfo'] = ';'.join(movieInfo)
item['star'] = star
item['quote'] = quote
yield item
nextlink = selector.xpath('//span[@class="next"]/link/@herf').extract()
if nextlink:
nextlink = nextlink[0]
print nextlink
#Request,?????????????????
yield Request(self.url + nextlink,callback=self.parse)
评论列表
文章目录