def next_link(cur, done):
try:
g = wikipedia.page(cur).html()
except wikipedia.exceptions.DisambiguationError as e:
for op in e.options:
if op not in done:
g = wikipedia.page(op).html()
break
soup = BeautifulSoup(re.sub(r'\([^)]*\)', '', g), "html.parser")
for para in soup.findAll("p"):
flag = False
for link in para.findAll("a"):
flag = True
if link.get("href").startswith("/wiki/") and link.get("title") not in done and link.contents[0].islower():
return link.get("title")
评论列表
文章目录