def walkNextPages(sess, url="https://iptime.com/iptime/?page_id=126&dffid=1&dfsid=11"):
try:
from os.path import basename
def get_pageid(url):
from urllib.parse import parse_qsl, urlsplit
qs = dict(parse_qsl(urlsplit(url).query))
return int(qs.get("pageid", "1"))
while True:
pageid = get_pageid(url)
print("pageid=%d" % pageid)
walkListItems(sess, url)
root = html.fromstring(sess.get(url=url).text)
arrows = [basename(_) for _ in root.xpath(".//ul[@class='pages']//img/@src")]
if 'next_1.gif' not in arrows:
break
nexturl = next(_ for _ in root.xpath(".//ul[@class='pages']//img") if
basename(_.attrib['src']) == 'next_1.gif')
url = urljoin(url, nexturl.xpath("../../a/@href")[0])
nextpageid = get_pageid(url)
assert nextpageid == pageid+1
except BaseException as ex:
traceback.print_exc()
print(ex)
评论列表
文章目录