def process_text_links(text):
"""Process links in text, adding some attributes and linkifying textual links."""
link_callbacks = [callbacks.nofollow, callbacks.target_blank]
def link_attributes(attrs, new=False):
"""Run standard callbacks except for internal links."""
href_key = (None, "href")
if attrs.get(href_key).startswith("/"):
return attrs
# Run the standard callbacks
for callback in link_callbacks:
attrs = callback(attrs, new)
return attrs
return bleach.linkify(
text,
callbacks=[link_attributes],
parse_email=False,
skip_tags=["code"],
)
python类linkify()的实例源码
def find_urls_in_text(text):
"""Find url's from text.
Bleach does the heavy lifting here by identifying the links.
:param text: Text to search links from
:returns: set of urls
"""
urls = []
def link_collector(attrs, new=False):
href_key = (None, "href")
urls.append(attrs.get(href_key))
return None
bleach.linkify(text, callbacks=[link_collector], parse_email=False, skip_tags=["code"])
return set(urls)
def preview_body(target, value, oldvalue, initiator):
allowed_tags = [
'a', 'abbr', 'acronym', 'b', 'img', 'blockquote', 'code',
'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2',
'h3', 'p'
]
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True,
attributes={
'*': ['class'],
'a': ['href', 'rel'],
'img': ['src', 'alt'], # ???????
}
))
# ??????JSON????????
def test_email_link_escaping():
tests = (
('''<a href='mailto:"james"@example.com'>'''
'''"james"@example.com</a>''',
'"james"@example.com'),
('''<a href="mailto:"j'ames"@example.com">'''
'''"j'ames"@example.com</a>''',
'"j\'ames"@example.com'),
('''<a href='mailto:"ja>mes"@example.com'>'''
'''"ja>mes"@example.com</a>''',
'"ja>mes"@example.com'),
)
def _check(o, i):
eq_(o, linkify(i, parse_email=True))
for (o, i) in tests:
yield _check, o, i
def test_link_query():
in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
'http://xx.com/?test=win</a>',
'<a rel="nofollow" href="http://xx.com/?test=win">'
'http://xx.com/?test=win</a>'),
linkify('http://xx.com/?test=win'))
in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
'xx.com/?test=win</a>',
'<a rel="nofollow" href="http://xx.com/?test=win">'
'xx.com/?test=win</a>'),
linkify('xx.com/?test=win'))
in_(('<a href="http://xx.com?test=win" rel="nofollow">'
'xx.com?test=win</a>',
'<a rel="nofollow" href="http://xx.com?test=win">'
'xx.com?test=win</a>'),
linkify('xx.com?test=win'))
def test_end_of_sentence():
"""example.com. should match."""
outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}',
'<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}')
intxt = '{0!s}{1!s}'
def check(u, p):
in_([out.format(u, p) for out in outs],
linkify(intxt.format(u, p)))
tests = (
('example.com', '.'),
('example.com', '...'),
('ex.com/foo', '.'),
('ex.com/foo', '....'),
)
for u, p in tests:
yield check, u, p
def test_url_utf8():
"""Allow UTF8 characters in URLs themselves."""
outs = ('<a href="{0!s}" rel="nofollow">{0!s}</a>',
'<a rel="nofollow" href="{0!s}">{0!s}</a>')
out = lambda url: [x.format(url) for x in outs]
tests = (
('http://éxámplé.com/', out('http://éxámplé.com/')),
('http://éxámplé.com/íàñá/', out('http://éxámplé.com/íàñá/')),
('http://éxámplé.com/íàñá/?foo=bar',
out('http://éxámplé.com/íàñá/?foo=bar')),
('http://éxámplé.com/íàñá/?fóo=bár',
out('http://éxámplé.com/íàñá/?fóo=bár')),
)
def check(test, expected_output):
in_(expected_output, linkify(test))
for test, expected_output in tests:
yield check, test, expected_output
def test_email_link_escaping():
tests = (
('''<a href='mailto:"james"@example.com'>'''
'''"james"@example.com</a>''',
'"james"@example.com'),
('''<a href="mailto:"j'ames"@example.com">'''
'''"j'ames"@example.com</a>''',
'"j\'ames"@example.com'),
('''<a href='mailto:"ja>mes"@example.com'>'''
'''"ja>mes"@example.com</a>''',
'"ja>mes"@example.com'),
)
def _check(o, i):
eq_(o, linkify(i, parse_email=True))
for (o, i) in tests:
yield _check, o, i
def test_end_of_sentence():
"""example.com. should match."""
out = '<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}'
intxt = '{0!s}{1!s}'
def check(u, p):
eq_(out.format(u, p),
linkify(intxt.format(u, p)))
tests = (
('example.com', '.'),
('example.com', '...'),
('ex.com/foo', '.'),
('ex.com/foo', '....'),
)
for u, p in tests:
yield check, u, p
def test_ports():
"""URLs can contain port numbers."""
tests = (
('http://foo.com:8000', ('http://foo.com:8000', '')),
('http://foo.com:8000/', ('http://foo.com:8000/', '')),
('http://bar.com:xkcd', ('http://bar.com', ':xkcd')),
('http://foo.com:81/bar', ('http://foo.com:81/bar', '')),
('http://foo.com:', ('http://foo.com', ':')),
)
def check(test, output):
out = '<a href="{0}" rel="nofollow">{0}</a>{1}'
eq_(out.format(*output),
linkify(test))
for test, output in tests:
yield check, test, output
def test_email_link_escaping():
tests = (
('''<a href='mailto:"james"@example.com'>'''
'''"james"@example.com</a>''',
'"james"@example.com'),
('''<a href="mailto:"j'ames"@example.com">'''
'''"j'ames"@example.com</a>''',
'"j\'ames"@example.com'),
('''<a href='mailto:"ja>mes"@example.com'>'''
'''"ja>mes"@example.com</a>''',
'"ja>mes"@example.com'),
)
def _check(o, i):
eq_(o, linkify(i, parse_email=True))
for (o, i) in tests:
yield _check, o, i
def test_ports():
"""URLs can contain port numbers."""
tests = (
('http://foo.com:8000', ('http://foo.com:8000', '')),
('http://foo.com:8000/', ('http://foo.com:8000/', '')),
('http://bar.com:xkcd', ('http://bar.com', ':xkcd')),
('http://foo.com:81/bar', ('http://foo.com:81/bar', '')),
('http://foo.com:', ('http://foo.com', ':')),
)
def check(test, output):
out = '<a href="{0}" rel="nofollow">{0}</a>{1}'
eq_(out.format(*output),
linkify(test))
for test, output in tests:
yield check, test, output
def test_url_utf8():
"""Allow UTF8 characters in URLs themselves."""
outs = ('<a href="{0!s}" rel="nofollow">{0!s}</a>',
'<a rel="nofollow" href="{0!s}">{0!s}</a>')
out = lambda url: [x.format(url) for x in outs]
tests = (
('http://éxámplé.com/', out('http://éxámplé.com/')),
('http://éxámplé.com/íàñá/', out('http://éxámplé.com/íàñá/')),
('http://éxámplé.com/íàñá/?foo=bar',
out('http://éxámplé.com/íàñá/?foo=bar')),
('http://éxámplé.com/íàñá/?fóo=bár',
out('http://éxámplé.com/íàñá/?fóo=bár')),
)
def check(test, expected_output):
in_(expected_output, linkify(test))
for test, expected_output in tests:
yield check, test, expected_output
def test_email_link_escaping():
tests = (
('''<a href='mailto:"james"@example.com'>'''
'''"james"@example.com</a>''',
'"james"@example.com'),
('''<a href="mailto:"j'ames"@example.com">'''
'''"j'ames"@example.com</a>''',
'"j\'ames"@example.com'),
('''<a href='mailto:"ja>mes"@example.com'>'''
'''"ja>mes"@example.com</a>''',
'"ja>mes"@example.com'),
)
def _check(o, i):
eq_(o, linkify(i, parse_email=True))
for (o, i) in tests:
yield _check, o, i
def test_end_of_sentence():
"""example.com. should match."""
out = '<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}'
intxt = '{0!s}{1!s}'
def check(u, p):
eq_(out.format(u, p),
linkify(intxt.format(u, p)))
tests = (
('example.com', '.'),
('example.com', '...'),
('ex.com/foo', '.'),
('ex.com/foo', '....'),
)
for u, p in tests:
yield check, u, p
def test_ports():
"""URLs can contain port numbers."""
tests = (
('http://foo.com:8000', ('http://foo.com:8000', '')),
('http://foo.com:8000/', ('http://foo.com:8000/', '')),
('http://bar.com:xkcd', ('http://bar.com', ':xkcd')),
('http://foo.com:81/bar', ('http://foo.com:81/bar', '')),
('http://foo.com:', ('http://foo.com', ':')),
)
def check(test, output):
out = '<a href="{0}" rel="nofollow">{0}</a>{1}'
eq_(out.format(*output),
linkify(test))
for test, output in tests:
yield check, test, output
def htmlize(text):
"""
This helper method renders Markdown then uses Bleach to sanitize it as
well as convert all links to actual links.
"""
text = bleach.clean(text, strip=True) # Clean the text by stripping bad HTML tags
text = markdown(text) # Convert the markdown to HTML
text = bleach.linkify(text) # Add links from the text and add nofollow to existing links
return text
# Compile regular expression functions for query normalization
def on_changed_about_me(target, value, oldvalue, initiaor):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i',
'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p']
target.about_me_html = bleach.linkify(
bleach.clean(markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def on_changed_summary(target, value, oldvalue, initiaor):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i',
'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p']
target.summary_html = bleach.linkify(
bleach.clean(markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def on_changed_catalog(target, value, oldvalue, initiaor):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i',
'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p']
target.catalog_html = bleach.linkify(
bleach.clean(markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def on_changed_body(target, value, oldvalue, initiator):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code',
'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul',
'h1', 'h2', 'h3', 'p']
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def on_changed_body(target, value, oldvalue, initiator):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'code', 'em', 'i',
'strong']
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def render_markdown(self):
"""Render markdown source to HTML with a tag whitelist."""
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'code', 'em', 'i',
'strong']
self.html = bleach.linkify(bleach.clean(
markdown(self.source, output_format='html'),
tags=allowed_tags, strip=True))
def markdown(value):
"""
Translate markdown to a safe subset of HTML.
"""
cleaned = bleach.clean(markdown_library.markdown(value),
tags=bleach.ALLOWED_TAGS +
['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
linkified = bleach.linkify(cleaned)
return mark_safe(linkified)
def on_changed_body(target, value, oldvalue, initiator):
allowed_tags = [
'a', 'abbr', 'acronym', 'b', 'code', 'em', 'img', 'i', 'strong'
]
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True
))
def on_changed_body(target, value, oldvalue, initiator):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code',
'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul',
'h1', 'h2', 'h3', 'p']
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def on_changed_body(target, value, oldvalue, initiator):
allowed_tags = ['a', 'abbr', 'acronym', 'b', 'code', 'em', 'i',
'strong']
target.body_html = bleach.linkify(bleach.clean(
markdown(value, output_format='html'),
tags=allowed_tags, strip=True))
def test_empty():
eq_('', linkify(''))
def test_simple_link():
in_(('a <a href="http://example.com" rel="nofollow">http://example.com'
'</a> link',
'a <a rel="nofollow" href="http://example.com">http://example.com'
'</a> link'),
linkify('a http://example.com link'))
in_(('a <a href="https://example.com" rel="nofollow">https://example.com'
'</a> link',
'a <a rel="nofollow" href="https://example.com">https://example.com'
'</a> link'),
linkify('a https://example.com link'))
in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link',
'a <a rel="nofollow" href="http://example.com">example.com</a> link'),
linkify('a example.com link'))
def test_trailing_slash():
in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>',
'<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'),
linkify('http://examp.com/'))
in_(('<a href="http://example.com/foo/" rel="nofollow">'
'http://example.com/foo/</a>',
'<a rel="nofollow" href="http://example.com/foo/">'
'http://example.com/foo/</a>'),
linkify('http://example.com/foo/'))
in_(('<a href="http://example.com/foo/bar/" rel="nofollow">'
'http://example.com/foo/bar/</a>',
'<a rel="nofollow" href="http://example.com/foo/bar/">'
'http://example.com/foo/bar/</a>'),
linkify('http://example.com/foo/bar/'))