def __crawllianjie(self,page_sourse):
response = HtmlResponse(url="my HTML string",body=page_sourse,encoding="utf-8")
hotel_list = response.xpath("//div[@class='searchresult_list ']/ul")
for hotel in hotel_list:
url = hotel.xpath("li[@class='searchresult_info_name']/h2/a/@href").extract()[0]
address = hotel.xpath("li[@class='searchresult_info_name']/p[@class='searchresult_htladdress']/text()").extract()[0]
commnum = hotel.xpath("li[@class='searchresult_info_judge ']/div/a/span[@class='hotel_judgement']/text()").extract()
if len(commnum):
commnum = re.sub('\D','',commnum[0])
commnum = commnum if len(commnum)>0 else 0
else:
commnum = 0
name = hotel.xpath("li[@class='searchresult_info_name']/h2/a/text()").extract()[0]
self.listPageInfo.append({
"guid": uuid.uuid1(),
"url": url,
"hotel_name": name,
"OTA": self.__ota_info,
"comm_num": int(commnum),
"address": address
})
评论列表
文章目录