def parse(self, response):
url = response.url
log.msg('[url]%s' % url)
body = response.body
soup = BeautifulSoup(body, 'lxml').select('.cardetail-infor')[0]
text = str(self.gettextonly(soup)).decode('utf-8')
m = re.findall(ur'(????|????|?????|????|????|? ? ?|? ? ?|????|??????)?\n?(.+)\n', text, re.M | re.U)
map = dict([(d[0], d[1]) for d in m])
result = SpecItem()
result['id'] = url.split('/')[-1]
result['spec'] = map
yield result
评论列表
文章目录