def __parseUrls(self, page_source):
response = HtmlResponse(url="my HTML string",body=page_source,encoding="utf-8")
# ?????????url???urlList?
url_list = response.xpath("//a[@class='name']/@href").extract()
comment_number_list = response.xpath("//div[@class='comment']/a/span/text()").extract()
name_list = response.xpath("//a[@class='name']/text()").extract()
address_list = response.xpath("//span[@class='address']/text()").extract()
if len(url_list) == len(comment_number_list) == len(name_list) == len(address_list):
for i in range(0, len(url_list)):
self.listPageInfo.append({
"guid": uuid.uuid1(),
"url": url_list[i],
"hotel_name": name_list[i],
"OTA": "??",
"comm_num": int(comment_number_list[i]),
"address": address_list[i]
})
评论列表
文章目录