def get_links_from_url(url): # ????url????
"""Download the page at `url` and parse it for links.
Returned links have had the fragment after `#` removed, and have been made
absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
'http://www.tornadoweb.org/en/stable/gen.html'.
"""
try:
response = yield httpclient.AsyncHTTPClient().fetch(url) # ????url,??url???
print('fetched %s' % url)
html = response.body if isinstance(response.body, str) \
else response.body.decode() # ?????????
urls = [urljoin(url, remove_fragment(new_url))
for new_url in get_links(html)] # ???url?????url
except Exception as e:
print('Exception: %s %s' % (e, url))
raise gen.Return([]) # Special exception to return a value from a coroutine.
raise gen.Return(urls) # If this exception is raised, its value argument is used as the result of the coroutine.
评论列表
文章目录