def parse(self, response):
item = response.css('div.listBox ul li ')
hrefs = item.css('div.listimg a::attr(href)').extract()
# titles = item.css('div.listInfo h3 p::text').extract()
# logging.log(logging.INFO, "parse " + len(hrefs))
# ???????????????????????parse_movie??
for href in hrefs:
# logging.log(logging.INFO, "hrefs[" + index + "]=" + href)
try:
yield scrapy.Request(response.urljoin(href),
callback=self.parse_movie)
except Exception as e:
continue
# ????????
next_page_str = u'???'
rex = '//div[@class="pagebox"]/a[contains(text(), "%s")]/@href' % next_page_str
next_page = response.xpath(rex).extract_first()
# ????????????????????????????????????????????
if next_page is not None:
next_page = response.urljoin(next_page)
yield scrapy.Request(next_page, callback=self.parse)
评论列表
文章目录