def parse_item(self, response):
page = Selector(response)
item = AppstoreItem()
item['title'] = page.xpath('//ul[@class="app-info-ul nofloat"]/li/p/span[@class="title"]/text()').extract_first().encode('utf-8')
item['url'] = response.url
item['appid'] = re.match(r'http://.*/(.*)', item['url']).group(1)
item['intro'] = page.xpath('//meta[@name="description"]/@content').extract_first().encode('utf-8')
divs = page.xpath('//div[@class="open-info"]')
recomm = ""
for div in divs:
url = div.xpath('./p[@class="name"]/a/@href').extract_first()
recommended_appid = re.match(r'http://.*/(.*)', url).group(1)
name = div.xpath('./p[@class="name"]/a/text()').extract_first().encode('utf-8')
recomm += "{0}:{1},".format(recommended_appid, name)
item['recommended'] = recomm
yield item
huawei_spider.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录