def scrape(site_address):
page = requests.get(site_address) #returns raw html
page = clean_html(page.content) #removes <script> tags and their contents
document = html.document_fromstring(page) #removes all other tags
return document.text_content()
# takes a url as a string and returns a STRING of all of the words
# that are used on that webpage
评论列表
文章目录