def preprocess_media_tags(element):
if isinstance(element, html.HtmlElement):
if element.tag in ['ol', 'ul']:
# ignore any spaces between <ul> and <li>
element.text = ''
elif element.tag == 'li':
# ignore spaces after </li>
element.tail = ''
elif element.tag == 'iframe':
iframe_src = element.get('src')
youtube = re.match(youtube_re, iframe_src)
vimeo = re.match(vimeo_re, iframe_src)
if youtube or vimeo:
element.text = '' # ignore any legacy text
if youtube:
yt_id = urlparse(iframe_src).path.replace('/embed/', '')
element.set('src', '/embed/youtube?url=' + quote_plus('https://www.youtube.com/watch?v=' + yt_id))
elif vimeo:
element.set('src', '/embed/vimeo?url=' + quote_plus('https://vimeo.com/' + vimeo.group(2)))
if not len(element.xpath('./ancestor::figure')):
_wrap_figure(element)
else:
element.drop_tag()
elif element.tag == 'blockquote' and element.get('class') == 'twitter-tweet':
twitter_links = element.xpath('.//a[@href]')
for tw_link in twitter_links:
if twitter_re.match(tw_link.get('href')):
twitter_frame = html.HtmlElement()
twitter_frame.tag = 'iframe'
twitter_frame.set('src', '/embed/twitter?url=' + quote_plus(tw_link.get('href')))
element.addprevious(twitter_frame)
_wrap_figure(twitter_frame)
element.drop_tree()
html_to_telegraph.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录