def parse(self, response):
marker_txt = re.findall(re.compile("markerData.*\}", re.MULTILINE), response.body_as_unicode())
if not len(marker_txt):
return
markers_json = "{\"" + marker_txt[0]
markers = list(json.loads(markers_json).values())[0]
if not len(markers):
return
for marker in markers:
marker_response = HtmlResponse(url="", body=marker["info"].encode("utf-8"))
hours = re.findall(r"\{\"label.*\}", marker["info"])
hours = hours[0]
parsed_hours = json.loads(hours)
addr_parts = marker_response.css(".address span:not(.phone)::text").extract()
url = marker_response.css("header a").xpath("@href").extract_first()
city, state = addr_parts[-1].split(",")
yield GeojsonPointItem(lat=marker.get("lat"), lon=marker.get("lng"),
name=marker_response.css("header a::text").extract_first(default=None),
addr_full=", ".join(addr_parts),
city=city.strip(),
state=state.strip(),
country="United States",
phone=marker_response.css(".phone::text").extract_first(),
website=url,
opening_hours=get_hours(parsed_hours["days"]),
ref=url.split("/")[-1].split(".")[0])
评论列表
文章目录