def _fragments_from_string(html_string):
fragments = html.fragments_fromstring(html_string)
if not len(fragments):
return []
# convert and append text node before starting tag
if not isinstance(fragments[0], html.HtmlElement):
if len(fragments[0].strip()) > 0:
if len(fragments) == 1:
return html.fragments_fromstring('<p>%s</p>' % fragments[0])
else:
paragraph = _create_element('p')
paragraph.text = fragments[0]
fragments[1].addprevious(paragraph)
fragments.insert(1, paragraph)
fragments.pop(0)
if not len(fragments):
return []
# remove xml instructions (if cleaning is disabled)
for instruction in fragments[0].xpath('//processing-instruction()'):
instruction.drop_tag()
return fragments
html_to_telegraph.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录