DriveServices.py 文件源码-python代码片段

DriveServices.py 文件源码

python

阅读 18 收藏 0 点赞 0 评论 0

项目：ugc.aggregator 作者: Dreamcatcher-GIS 项目源码文件源码

def __crawllianjie(self,page_sourse):
        response = HtmlResponse(url="my HTML string",body=page_sourse,encoding="utf-8")
        hotel_list = response.xpath("//div[@class='searchresult_list ']/ul")
        for hotel in hotel_list:
            url = hotel.xpath("li[@class='searchresult_info_name']/h2/a/@href").extract()[0]
            address = hotel.xpath("li[@class='searchresult_info_name']/p[@class='searchresult_htladdress']/text()").extract()[0]
            commnum = hotel.xpath("li[@class='searchresult_info_judge ']/div/a/span[@class='hotel_judgement']/text()").extract()
            if len(commnum):
                commnum = re.sub('\D','',commnum[0])
                commnum = commnum if len(commnum)>0 else 0
            else:
                commnum = 0
            name = hotel.xpath("li[@class='searchresult_info_name']/h2/a/text()").extract()[0]
            self.listPageInfo.append({
                "guid": uuid.uuid1(),
                "url": url,
                "hotel_name": name,
                "OTA": self.__ota_info,
                "comm_num": int(commnum),
                "address": address
            })