def convert_html_to_telegraph_format(html_string, clean_html=True, output_format="json_string"):
if clean_html:
html_string = clean_article_html(html_string)
body = preprocess_fragments(
_fragments_from_string(html_string)
)
if body is not None:
desc = [x for x in body.iterdescendants()]
for tag in desc:
preprocess_media_tags(tag)
move_to_top(body)
post_process(body)
else:
fragments = _fragments_from_string(html_string)
body = fragments[0].getparent() if len(fragments) else None
content = []
if body is not None:
content = [_recursive_convert(x) for x in body.iterchildren()]
if output_format == 'json_string':
return json.dumps(content, ensure_ascii=False)
elif output_format == 'python_list':
return content
elif output_format == 'html_string':
return html.tostring(body, encoding='unicode')
html_to_telegraph.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录