MovieSpider.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:crawler 作者: Yabea 项目源码 文件源码
def parse(self,response):
        item = DoubanmovieItem()
        selector = Selector(response)
        movies = selector.xpath('//div[@class="info"]')
        for eachmovie in movies:
            title = eachmovie.xpath('div[@class="hd"]/a/span/text()').extract()
            fullTitle = ''
            for each in fullTitle:
                fullTitle += each

            movieInfo = eachmovie.xpath('div[@class="bd"]/p/text()').extract()
            star = eachmovie.xpath('div[@class="bd"]/div[@class="star"]/span/text()').extract()[0]
            quote = eachmovie.xpath('div[@class="bd"]/p[@class="quote"]/span/text()').extract()
            if quote:
                quote = quote[0]
            else:
                quote = ''

            item['title'] = title
            item['movieInfo'] = ';'.join(movieInfo)
            item['star'] = star
            item['quote'] = quote

            yield item

        nextlink = selector.xpath('//span[@class="next"]/link/@herf').extract()
        if nextlink:
            nextlink = nextlink[0]
            print nextlink
            #Request,?????????????????
            yield Request(self.url + nextlink,callback=self.parse)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号