def get_posts_url_from_page(page_url):
"""
????????????URL
:param page_url
:return:
"""
try:
response = yield httpclient.AsyncHTTPClient().fetch(page_url, headers=headers)
soup = BeautifulSoup(response.body, 'html.parser')
posts_tag = soup.find_all('div', class_="post floated-thumb")
urls = []
for index, archive in enumerate(posts_tag):
meta = archive.find("div", class_="post-meta")
url = meta.p.a['href']
urls.append(url)
raise gen.Return(urls)
except httpclient.HTTPError as e:
print('Exception: %s %s' % (e, page_url))
raise gen.Return([])
评论列表
文章目录