def __parseUrls(self,page_source):
response = HtmlResponse(url="My HTML String",body=page_source,encoding="utf-8")
hotel_list = response.xpath("//div[@class='h_list']/div[@class='h_item']")
for hotel in hotel_list:
url = hotel.xpath(".//p[@class='h_info_b1']/a/@href").extract()[0]
name = hotel.xpath(".//p[@class='h_info_b1']/a/@title").extract()[0]
address = hotel.xpath(".//p[@class='h_info_b2']/text()").extract()[1]
commnum = hotel.xpath(".//div[@class='h_info_comt']/a/span[@class='c555 block mt5']/b/text()").extract()
if len(commnum)==0:
commnum = 0
else:commnum = commnum[0]
self.listPageInfo.append({
"guid": uuid.uuid1(),
"url": url,
"hotel_name": name,
"OTA": self.__ota_info,
"comm_num": commnum,
"address": address
})
pass
评论列表
文章目录