def get_html(filename): html_text = None h = html2text.HTML2Text() with io.open('pdf/' + filename + '.html', 'r', encoding='utf-8') as fp: content = fp.read() html_text = h.handle(content) return html_text