def parse(self, response):
page = Selector(response)
hrefs = page.xpath('//h4[@class="title"]/a/@href')
for href in hrefs:
url = href.extract()
yield scrapy.Request(url, callback=self.parse_item)
div = page.xpath('//div[@class="page-ctrl ctrl-app"]')
hrefs = div.xpath('.//a/@href').extract()
for href in hrefs:
url = response.urljoin(href)
print url
# yield scrapy.Request(url, self.parse, meta={
# 'splash': {
# 'endpoint': 'render.html',
# 'args': {'wait': 0.5}
# }
# })
huawei_spider.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录