def content_parse(self, response):
log.msg('run into content_parse at line 40', level=log.INFO)
item = response.meta['item']
result = response.xpath(
'//div[@class="main-content"]/div[@class="lemma-summary"]/div[@class="para"]').extract() # ????
if len(result) != 0:
pattern = re.compile(r'<[^>]+>', re.S)
description = pattern.sub('', result[0]).encode('utf-8')
else:
description = 'description_null'
item['description'] = description
picture_url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%s&ic=0&width=0&height=0' % item[
'title'].decode('utf-8')
log.msg('picture_url: ' + picture_url, level=log.INFO)
log.msg('run out content_parse at line 51', level=log.INFO)
yield scrapy.Request(picture_url, meta={'item': item,
'splash': {
'endpoint': 'render.html',
'args': {'wait': 0.5}
}
}, callback=self.picture_parse)
评论列表
文章目录