def goodsDetail(detail_url):
'''
??xpath??????
:param detail_url: ???url
:return: ?????? dict
'''
goods_data = defaultdict()
# ?????
goods_data['source_url'] = detail_url
# ??html body???str??
body = getHtmlFromJs(detail_url)['content'].encode('utf-8')
html = HtmlResponse(url=detail_url,body=str(body))
# ??
goods_data['name'] = html.xpath('/html/body/div[7]/div[2]/h1/text()').extract()[0]
# ??
goods_data['price'] = html.selector.xpath('/html/body/div[7]/div[2]/div[2]/ul/li[1]/label[1]/text()').extract()[0]
# ??
goods_data['type'] = html.selector.xpath('/html/body/div[7]/div[2]/div[2]/ul/li[3]/label/text()').extract()[0]
# ??
goods_data['detail'] = html.selector.xpath('/html/body/div[9]/div[2]/div[2]/table').extract()[0]
# ??
pics = []
for pic in html.selector.xpath('/html/body/div[7]/div[1]/div[2]/div[2]/ul/li/img'):
# ??????,????
pics.append(pic.xpath('@src').extract()[0].replace('!240240',''))
goods_data['pics'] = '|'.join(pics)
goods_data['storage'] = ''
goods_data['lack_period'] = ''
goods_data['created'] = int(time.time())
goods_data['updated'] = int(time.time())
# print(goods_data['detail'])
return goods_data
评论列表
文章目录