def parse_item(self, response):
item = CrawlmeizituItem()
selector = scrapy.Selector(response)
image_title = selector.xpath('//h2/a/text()').extract()
image_url = selector.xpath('//h2/a/@href').extract()
image_tags = selector.xpath('//div[@class="metaRight"]/p/text()').extract()
if selector.xpath('//*[@id="picture"]/p/img/@src').extract():
image_src = selector.xpath('//*[@id="picture"]/p/img/@src').extract()
else:
image_src = selector.xpath('//*[@id="maincontent"]/div/p/img/@src').extract()
if selector.xpath('//*[@id="picture"]/p/img/@alt').extract():
pic_name = selector.xpath('//*[@id="picture"]/p/img/@alt').extract()
else:
pic_name = selector.xpath('//*[@id="maincontent"]/div/p/img/@alt').extract()
#//*[@id="maincontent"]/div/p/img/@alt
item['title'] = image_title
item['url'] = image_url
item['tags'] = image_tags
item['src'] = image_src
item['alt'] = pic_name
print(item)
time.sleep(1)
yield item
评论列表
文章目录