def keyword_extractor(data):
try:
#np_extractor = NPExtractor(words_wo_stopwords(strip_tags(data)))
#result = np_extractor.extract()
text = words_wo_stopwords(strip_tags(data))
#TODO this is duplicated job, should be improved
words = word_tokenize(strip_tags(text))
taggged = pos_tag(words)
cleaned = filter_insignificant(taggged)
text = " ".join(cleaned)
wc = WordCloudMod().generate(text)
result = list(wc.keys())[:10]
except Exception as err:
print(colored.red("At keywords extraction {}".format(err)))
result = []
return result
# TODO definitely can be better if we knew where content is
python类strip_tags()的实例源码
def get_events(event_status):
client = meetup.api.Client('73c42797541a6c207a2a2b41262a66')
group_info = client.GetGroup({'urlname': 'Perth-Django-Users-Group'})
group_events = client.GetEvents({'group_id': group_info.id, 'status': event_status})
return [
{
'group_id': group_info.id,
'event_id': event['id'],
'event_name': event['name'],
'event_address': event['venue']['address_1'],
'event_description': event['description'],
'og_event_description': strip_tags(event['description']).encode('ascii', 'ignore'),
'event_yes_rsvp_count': event['yes_rsvp_count'],
'event_datetime': datetime.datetime.fromtimestamp(event['time'] / 1000.0, pytz.timezone('Australia/Perth'))
}
for event in reversed(group_events.results)
]
def send_email(to, kind, **kwargs):
current_site = Site.objects.get_current()
ctx = {
"current_site": current_site,
"STATIC_URL": settings.STATIC_URL,
}
ctx.update(kwargs.get("context", {}))
subject = "[%s] %s" % (
current_site.name,
render_to_string("emails/%s/subject.txt" % kind, ctx).strip()
)
message_html = render_to_string("emails/%s/message.html" % kind, ctx)
message_plaintext = strip_tags(message_html)
from_email = settings.DEFAULT_FROM_EMAIL
email = EmailMultiAlternatives(subject, message_plaintext, from_email, to)
email.attach_alternative(message_html, "text/html")
email.send()
def one_line_address(self):
"""
Put the address all on one line
"""
import re
from django.utils.html import strip_tags
if self.street_address and self.city and self.state and self.zipcode:
# Since the street address is a text field, we have to clean it up a bit
# making sure HTML tags are removed, <BR> tags are converted to newlines
# and newlines are replaced with ', '
address = re.sub(r'<br\s*/?>', '\n', self.street_address)
address = strip_tags(address)
address_parts = address.splitlines()
address_parts.extend([
self.city,
self.state,
self.zipcode,
unicode(self.country.name),
])
return ", ".join(address_parts)
return ""
def send(self):
"""Sends the payment email along with the invoice."""
body = self.get_body()
# Set non-empty body according to
# http://stackoverflow.com/questions/14580176/confusion-with-sending-email-in-django
mail = EmailMultiAlternatives(subject=self.get_subject(),
body=strip_tags(body),
to=self.get_recipient_list(),
cc=self.get_cc_list(),
bcc=self.get_bcc_list())
mail.attach_alternative(body, 'text/html')
for attachment in self.attachments:
mail.attach_file(attachment[0], attachment[1])
return mail.send()
def get_autopost_form(self, request, obj):
initial_text = self.get_autopost_text(obj)
initial_text = unescape(strip_tags(initial_text)).strip()
initial_text = re_newlines.sub('\n', initial_text)
initial_text = initial_text[:conf.TEXT_MAX_LENGTH]
if request.method == 'POST':
return AutpostForm(
request.POST,
request.FILES,
initial={
'networks': conf.ALLOWED_NETWORK_NAMES,
'text': initial_text,
},
prefix=AUTOPOST_FORM_PREFIX
)
else:
return AutpostForm(
initial={
'networks': conf.ALLOWED_NETWORK_NAMES,
'text': initial_text,
},
prefix=AUTOPOST_FORM_PREFIX
)
def send(request, receivers, subject, message, fail_silently=True):
if not receivers:
return True
if isinstance(receivers, str):
receivers = [receivers]
# ??????? ?????? ? subject
site = get_current_site(request)
subject = subject.format(domain=site.domain)
plain = strip_tags(message)
plain = re_newline_spaces.sub('\n', plain)
plain = re_newlines.sub('\n\n', plain)
send_mail(
subject=subject,
message=plain,
from_email=settings.DEFAULT_FROM_EMAIL,
recipient_list=receivers,
html_message=message,
fail_silently=fail_silently,
)
def send_activation_email(request, user):
subject = _('Profile Activation')
from_email = settings.DEFAULT_FROM_EMAIL
current_site = get_current_site(request)
domain = current_site.domain
code = get_random_string(20)
context = {
'domain': domain,
'code': code,
}
act = Activation()
act.code = code
act.user = user
act.save()
html_content = render_to_string('email/activation_profile.html', context=context, request=request)
text_content = strip_tags(html_content)
msg = EmailMultiAlternatives(subject, text_content, from_email, [user.email])
msg.attach_alternative(html_content, 'text/html')
msg.send()
def send_activation_change_email(request, user, new_email):
subject = _('Change email')
from_email = settings.DEFAULT_FROM_EMAIL
current_site = get_current_site(request)
domain = current_site.domain
code = get_random_string(20)
context = {
'domain': domain,
'code': code,
}
act = Activation()
act.code = code
act.user = user
act.email = new_email
act.save()
html_content = render_to_string('email/change_email.html', context=context, request=request)
text_content = strip_tags(html_content)
msg = EmailMultiAlternatives(subject, text_content, from_email, [user.email])
msg.attach_alternative(html_content, 'text/html')
msg.send()
def league_document(request):
try:
league_tag = request.GET.get('league', None)
type_ = request.GET.get('type', None)
strip_html = request.GET.get('strip_html', None) == 'true'
except ValueError:
return HttpResponse('Bad request', status=400)
if not league_tag or not type_:
return HttpResponse('Bad request', status=400)
league_doc = LeagueDocument.objects.filter(league__tag=league_tag, type=type_).first()
if league_doc is None:
return JsonResponse({'name': None, 'content': None, 'error': 'not_found'})
document = league_doc.document
content = document.content
if strip_html:
content = strip_tags(content)
return JsonResponse({
'name': document.name,
'content': content
})
def site_email(to, template, context, subject):
template = template
subject = subject
to = to
from_email = settings.SENDER_EMAIL
#context = Context(context)
html_message = get_template('emails/' + template + '.html').render(context)
message = strip_tags(html_message)
send_mail(
subject=subject,
message=message,
from_email=from_email,
recipient_list=to,
html_message=html_message
)
def test_default_jumbotron(self):
"""
Default jumbotron rendering
"""
page1 = self.create_page(slug='page1', title="Hello", content="<p>test</p>", button1_url='http://example.org/', button1_title="GO")
template = Template('{% load fluent_contents_tags %}{% page_placeholder "content" %}')
request = RequestFactory().get("/", HTTP_HOST='example.org')
html = template.render(Context({'page': page1, 'request': request}))
expected = '''<div class="jumbotron">
<div class="container">
<h1>Hello</h1>
<p>test</p>
<p><a class="btn btn-primary btn-lg" href="http://example.org/" role="button">GO</a></p>
</div>
</div>'''
self.assertEqual(slugify(strip_tags(html)), 'hello-test-go')
self.assertHTMLEqual(html.strip(), expected)
def send_email(cls):
"""
Send an email message.
:return:
"""
template = cls.body['template']
subject = cls.body['subject']
to = cls.body['to']
from_email = cls.body['from_email']
context = cls.body['context']
context = Context(context)
html_message = get_template('email/' + template + '.html').render(context)
message = strip_tags(html_message)
send_mail(
subject=subject,
message=message,
from_email=from_email,
recipient_list=to,
html_message=html_message
)
def test_default_pager(self):
"""
Default pager rendering
"""
page1 = self.create_page(slug='page1', position=1)
page2 = self.create_page(slug='page2', position=2)
page3 = self.create_page(slug='page3', position=3)
template = Template('{% load fluent_contents_tags %}{% page_placeholder "content" %}')
request = RequestFactory().get("/", HTTP_HOST='example.org')
# Standard pagers
html1 = template.render(Context({'page': page1, 'request': request}))
html2 = template.render(Context({'page': page2, 'request': request}))
html3 = template.render(Context({'page': page3, 'request': request}))
self.assertEqual(strip_tags(html1).strip(), 'page2 →')
self.assertEqual(strip_tags(html2).strip(), '← page1\n page3 →')
self.assertEqual(strip_tags(html3).strip(), '← page2')
self.assertTrue('<li class="previous"><a href="/foo/page1/">' in html2)
self.assertTrue('<li class="next"><a href="/foo/page3/">' in html2)
def test_no_arrows(self):
"""
Pager without arrows
"""
page1 = self.create_page(slug='page1', position=1, show_arrows=False)
page2 = self.create_page(slug='page2', position=2, show_arrows=False)
page3 = self.create_page(slug='page3', position=3, show_arrows=False)
template = Template('{% load fluent_contents_tags %}{% page_placeholder "content" %}')
request = RequestFactory().get("/", HTTP_HOST='example.org')
html1 = template.render(Context({'page': page1, 'request': request}))
html2 = template.render(Context({'page': page2, 'request': request}))
html3 = template.render(Context({'page': page3, 'request': request}))
self.assertEqual(strip_tags(html1).strip(), 'page2')
self.assertEqual(strip_tags(html2).strip(), 'page1\n page3')
self.assertEqual(strip_tags(html3).strip(), 'page2')
def test_other_titles(self):
"""
Pager with custom title
"""
page1 = self.create_page(slug='page1', position=1, previous_title='N/A', next_title='PAGE2')
page2 = self.create_page(slug='page2', position=2, previous_title='PAGE1', next_title='PAGE3')
page3 = self.create_page(slug='page3', position=3, previous_title='PAGE2', next_title='N/A')
template = Template('{% load fluent_contents_tags %}{% page_placeholder "content" %}')
request = RequestFactory().get("/", HTTP_HOST='example.org')
html1 = template.render(Context({'page': page1, 'request': request}))
html2 = template.render(Context({'page': page2, 'request': request}))
html3 = template.render(Context({'page': page3, 'request': request}))
self.assertEqual(strip_tags(html1).strip(), 'PAGE2 →')
self.assertEqual(strip_tags(html2).strip(), '← PAGE1\n PAGE3 →')
self.assertEqual(strip_tags(html3).strip(), '← PAGE2')
def restore_account(username, temp_password, recipient):
"""
Celery task for sending mail on restore user data.
:param str username: The restored username.
:param str temp_password: The temporary password for restored username.
:param str recipient: The mail recipient.
"""
html_content = render_to_string('mails/account_restore.html', {'username': username, 'password': temp_password})
text_content = strip_tags(html_content)
subject = '?????????????? ????????'
email = EmailMultiAlternatives(subject, text_content, to=[recipient])
email.attach_alternative(html_content, 'text/html')
email.send()
# ----------------------------------------------------------------------------------------------------------------------
def changed_password(username, recipient):
"""
Celery task for sending mail, to notify user about password changed.
:param str username: The restored username.
:param str recipient: The mail recipient.
"""
html_content = render_to_string('mails/password_changed.html', {'username': username})
text_content = strip_tags(html_content)
subject = '????????? ?????? ????????'
email = EmailMultiAlternatives(subject, text_content, to=[recipient])
email.attach_alternative(html_content, 'text/html')
email.send()
# ----------------------------------------------------------------------------------------------------------------------
def clean_message(self):
"""Strip message of HTML tags and leading/trailing whitespace."""
value = self.cleaned_data['message']
return strip_tags(value.strip())
def clean_subject(self):
"""Strip subject of HTML tags and leading/trailing whitespace."""
value = self.cleaned_data['subject']
return strip_tags(value.strip())
def add_help_attrs(self, widget=None):
if widget is None:
widget = self.widget
if not isinstance(widget, CheckboxInput):
widget.attrs['title'] = widget.attrs.get(
'title',
escape(strip_tags(self.field_help))
)
def put_inside_label(self, html):
content = '{field} {label}'.format(
field=html,
label=self.field.label,
)
return render_label(
content=mark_safe(content),
label_for=self.field.id_for_label,
label_title=escape(strip_tags(self.field_help))
)
def add_error_attrs(self):
field_title = self.widget.attrs.get('title', '')
field_title += ' ' + ' '.join(
[strip_tags(e) for e in self.field_errors])
self.widget.attrs['title'] = field_title.strip()
def get_object_display(self, attachment):
instance_name = get_attachment_name(self.request, attachment)
vals = {"volume_name": attachment['volume_name'],
"instance_name": html.strip_tags(instance_name)}
return _("Volume %(volume_name)s on instance %(instance_name)s") % vals
def get_meta_description(self):
return self.search_description or truncatewords(strip_tags(self.body), 20)
def collapse_and_strip_tags(text: str) -> str:
'''
Strips HTML tags and collapases newlines in the given string.
Example:
>>> collapse_and_strip_tags('\\n\\n<p>hi james</p>\\n\\n\\n')
'\\nhi james\\n'
'''
return re.sub(r'\n+', '\n', strip_tags(text))
def description_summary(self):
lines = self.description.split('\n')
if len(lines) > 0:
first_html = markdown.markdown(lines[0])
first_line = strip_tags(first_html).strip()
if len(first_line) > 140:
first_line = first_line[:140] + '...'
return first_line
else:
return ''
def save(self, *args, **kwargs):
# ??????????????
if not self.excerpt:
self.excerpt = strip_tags(self.body)[:74]
super(Post, self).save(*args, **kwargs)
def clean_html(value):
return strip_tags(value).replace('¶', '')
def strip_all(self, html):
"""
Clean html content striping all html tags and removing all inline
script and style.
"""
try:
cleaner = Cleaner(style=True, kill_tags=self.kill_tags)
cleaned_html = cleaner.clean_html(html)
text = strip_tags(cleaned_html).replace('¶', '')
except ParserError:
return ""
return " ".join(text.split())