def convert_to_text(body):
"""
Create plain text from html
:param body:
:return:
"""
if not body:
return body
txt_body = re.sub(r'(<br\s*/\s*>|<\s*/\s*(?:div|p)>)', '\\1\n', body, flags=re.IGNORECASE)
txt_body = striptags(txt_body) # Striptags
txt_body = re.sub(r' ', ' ', txt_body, flags=re.IGNORECASE) # Replace with space
txt_body = re.sub(r' {2,}', ' ', txt_body, flags=re.IGNORECASE) # Squash all multi spaces
txt_body = re.sub(r'\r\n', '\n', txt_body, flags=re.IGNORECASE) # single new line format
txt_body = re.sub(r'\t', '\n', txt_body, flags=re.IGNORECASE) # Remove indents
txt_body = re.sub(r'\n( )+', '\n', txt_body, flags=re.IGNORECASE) # Remove indents
txt_body = re.sub(r'\n{3,}', '\n\n', txt_body, flags=re.IGNORECASE) # Limit consecutive new lines to a max of 2
return txt_body
python类striptags()的实例源码
def send_mail_template(
subject, template_name, context, recipient_list,
from_email=settings.DEFAULT_FROM_EMAIL, fail_silently=False):
"""Method sendmail with template html"""
message_html = render_to_string(template_name, context)
message_txt = striptags(message_html)
email = EmailMultiAlternatives(
subject=subject, body=message_txt, from_email=from_email,
to=recipient_list
)
email.attach_alternative(message_html, "text/html")
email.send(fail_silently=fail_silently)
def _get_word_count(self):
"""Stupid word counter for an article."""
return len(striptags(self.rendered_content).split(' '))
def striptags(value):
from django.template.defaultfilters import striptags
return striptags(value)
def extract_text(self):
# return the rendered content, with HTML tags stripped.
html = render_content_items(
request=None, items=self.contentitem_set.all())
return striptags(html)
def preview(self):
"""Returns the first sentence of the post, with HTML tags
stripped, for use as a preview blurb."""
body_text = striptags(' '.join([
child.value.source for child in self.body
if child.block_type == 'rich_text'
]))
sentences = body_text.split('.')
return '.'.join(sentences[:1]) + '.'
def item_title(self, notification):
return striptags(notification.message)
def get_excerpt(self):
return truncatewords(
embed_videos(
html.unescape(striptags(self.content)),
strip=True
),
55
)
def get_content_snippet(content, keyword, max_words=30):
"""
Takes some text. Removes html tags and newlines from it.
If keyword in this text - returns a short text snippet
with keyword wrapped into strong tag and max_words // 2 before and after it.
If no keyword - return text[:max_words].
"""
def clean_text(content):
"""
Removes tags, newlines and spaces from content.
Return array of words.
"""
# remove html tags
content = striptags(content)
# remove newlines
content = content.replace("\n", " ").split(" ")
return list(filter(lambda x: x != "", content))
max_words = int(max_words)
pattern = re.compile(
r'(?P<before>.*)%s(?P<after>.*)' % re.escape(keyword),
re.MULTILINE | re.IGNORECASE | re.DOTALL
)
match = pattern.search(content)
if match:
words = clean_text(match.group("before"))
before_words = words[-max_words // 2:]
words = clean_text(match.group("after"))
after = " ".join(words[:max_words - len(before_words)])
before = " ".join(before_words)
html = "%s %s %s" % (before, striptags(keyword), after)
kw_p = re.compile(r'(%s)' % keyword, re.IGNORECASE)
html = kw_p.sub(r"<strong>\1</strong>", html)
return mark_safe(html)
return " ".join(clean_text(content)[:max_words])