def parse_ph_key(self, response):
selector = Selector(response)
logging.debug('request url:------>' + response.url)
# logging.info(selector)
divs = selector.xpath('//div[@class="phimage"]')
for div in divs:
viewkey = re.findall('viewkey=(.*?)"', div.extract())
# logging.debug(viewkey)
yield Request(url='https://www.pornhub.com/embed/%s' % viewkey[0],
callback=self.parse_ph_info)
url_next = selector.xpath(
'//a[@class="orangeButton" and text()="Next "]/@href').extract()
logging.debug(url_next)
if url_next:
# if self.test:
logging.debug(' next page:---------->' + self.host + url_next[0])
yield Request(url=self.host + url_next[0],
callback=self.parse_ph_key)
# self.test = False
评论列表
文章目录