def parse(self, response):
hxs = scrapy.Selector(response)
slots_tutorials = hxs.xpath('//td[@class="slot slot-tutorial"]')
for slot in slots_tutorials:
speakers_tutorials = slot.xpath('//span[@class="speaker"]/text()').extract()
urls_tutorials = slot.xpath('//span[@class="title"]//@href').extract()
talks_tutorials = slot.xpath('//span[@class="title"]//a/text()').extract()
indexSpeaker=0
for speaker in speakers_tutorials:
yield Request(url=''.join(('http://www.pydata.org', urls_tutorials[indexSpeaker])),
callback=self.parse_details,
meta={'speaker': speaker.strip(), 'url': urls_tutorials[indexSpeaker],
'talk': talks_tutorials[indexSpeaker]}
)
indexSpeaker=indexSpeaker+1
pydataSpiderDetails.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录