def get_site_text(url): resp = requests.get(url) resp.raise_for_status() html = resp.text return html2text.html2text(html) # 2: Score each word for an individual page against the full set of pages