def parse(self, response):
item = DoubanTopMoviesItem()
item['title_ch'] = response.xpath('//div[@class="hd"]//span[@class="title"][1]/text()').extract()
# ???title-title-other ??3?????????title-other????????????????
# en_list = response.xpath('//div[@class="hd"]//span[@class="title"][2]/text()').extract()
# item['title_en'] = [en.replace('\xa0/\xa0','').replace(' ','') for en in en_list]
# ht_list = response.xpath('//div[@class="hd"]//span[@class="other"]/text()').extract()
# item['title_ht'] = [ht.replace('\xa0/\xa0','').replace(' ','') for ht in ht_list]
# detail_list = response.xpath('//div[@class="bd"]/p[1]/text()').extract()
# item['detail'] = [detail.replace(' ', '').replace('\xa0', '').replace('\n', '') for detail in detail_list]
# ?????????quote??????????
# item['quote'] = response.xpath('//span[@class="inq"]/text()').extract()
item['rating_num'] = response.xpath('//div[@class="star"]/span[2]/text()').extract()
# ??????“XXX???”???????????XXX??
count_list = response.xpath('//div[@class="star"]/span[4]/text()').extract()
item['rating_count'] = [re.findall('\d+',count)[0] for count in count_list]
item['image_urls'] = response.xpath('//div[@class="pic"]/a/img/@src').extract()
item['topid'] = response.xpath('//div[@class="pic"]/em/text()').extract()
yield item
# ???????????
# new_url = response.xpath('//link[@rel="next"]/@href').extract_first()
# if new_url:
# next_url = self.base_url+new_url
# yield scrapy.Request(next_url, callback=self.parse)
######-------??start_urls?LinkExtractor ???????--------#####
# from scrapy.spiders import CrawlSpider, Rule
# from scrapy.linkextractors import LinkExtractor
# class SpDoubanSpider(CrawlSpider):
# ?
# ?????????????????
# rules = [Rule(LinkExtractor(allow=(r'https://movie.douban.com/top250\?start=\d+.*')),
# callback='parse_item', follow=True)
# ]
# def parse_item(self, response):
# # item ??????
# yield item
######-------??start_urls?LinkExtractor ???????--------#####