def alternative_parse_method(self, response):
# An alternative would be to build a Scrapy selector from the JS string
# and extract the data using CSS selectors
script = response.xpath('//script[contains(., "var data =")]/text()').extract_first()
sel = scrapy.Selector(root=js2xml.parse(script))
for quote in sel.css('var[name="data"] > array > object'):
yield {
'text': quote.css('property[name="text"] > string::text').extract_first(),
'author': quote.css('property[name="author"] property[name="name"] > string::text').extract_first(),
'tags': quote.css('property[name="tags"] string::text').extract(),
}
link_next = response.css('li.next a::attr("href")').extract_first()
if link_next:
yield scrapy.Request(response.urljoin(link_next))
spider_7_quotes_js2xml.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录