def html2text(html, colors=True):
soup = BeautifulSoup(html, "html.parser")
# Color h1-h3
if colors:
h1colors = '\033[93m'
h1colore = '\033[0m'
else:
h1colors = h1colore = ""
for h1 in soup.findAll('h1'):
h1.string = "\n{0}{1}{2}".format(h1colors, h1.string, h1colore)
for h2 in soup.findAll('h2'):
h2.string = "\n{0}{1}{2}".format(h1colors, h2.string, h1colore)
for h3 in soup.findAll('h3'):
h3.string = "\n{0}{1}{2}".format(h1colors, h3.string, h1colore)
if colors:
# Color bold texts
bcolors = '\033[92m'
bcolore = '\033[0m'
for bold in soup.findAll('b'):
bold.string = "{0}{1}{2}".format(bcolors, bold.string, bcolore)
for bold in soup.findAll('strong'):
bold.string = "{0}{1}{2}".format(bcolors, bold.string, bcolore)
# Replace hr with visual lines
try:
hrstring = "".ljust(os.get_terminal_size().columns, '-')
# piped output to file or other process
except OSError:
hrstring = "-----"
for hr in soup.findAll('hr'):
replace = soup.new_tag('p')
replace.string = hrstring
hr.insert_after(replace)
hr.unwrap()
# Replace images by information-texts
for img in soup.findAll('img'):
replace = soup.new_tag('p')
try:
alt = " \"{0}\"".format(img['alt'])
except KeyError:
alt = ""
replace.string = "[IMAGE{0}]\n".format(alt)
img.insert_after(replace)
img.unwrap()
return soup.text
评论列表
文章目录