def format_text(feed):
"""
Converts a html text to markdown and adds a bottom line to it
:param feed: Feed to format
:return: formatted text
"""
text = html2text.html2text(feed.desc)
link = feed.link
text = '[Link zum PietSmiet.de-Artikel](' + link + ')\n\n' + \
text + '\n\n--- \n[Code](https://github.com/PietsmietApp/pietsmiet_xposter) | ' + \
'*Auch als Push-Benachrichtigung in der [Community App für Pietsmiet](' \
'https://play.google.com/store/apps/details?id=de.pscom.pietsmiet&referrer=utm_source%3Dreddit' \
'%26utm_medium%3Duploadplan)* '
return text
python类html2text()的实例源码
def send_email(to_address, subject, html_body):
try:
smtp = EmailService.create_smtp_server()
message = mailer.Message(
From=EmailService.__from_address,
To=to_address,
charset='utf-8')
message.Subject = subject
message.Html = html_body
message.Body = html2text.html2text(html_body)
if not EmailService.__is_debug_mode:
print("Sending message (live!)")
smtp.send(message)
else:
print("Skipping send, email is in dev mode.")
except Exception as x:
print("Error sending mail: {}".format(x))
def checkCalendarForUpcomingEvents():
"""
Checks calendar for upcoming events
"""
nowDate = datetime.datetime.now()
laterDate = nowDate + datetime.timedelta(minutes = calendarSettings['TimespanToCheck'])
successful, res = showAgenda('', nowDate.strftime("%d.%m.%Y %H:%M"), laterDate.strftime("%d.%m.%Y %H:%M"), True)
if successful:
for item in res:
eventContent = '### **{0}**\nTime: {1} - {2} (KIT time)\nDetails: {3}Location: {4}\n\n'.format(item.subject,item.start.astimezone(EWSTimeZone.timezone('Europe/Copenhagen')).strftime('%H:%M'),item.end.astimezone(EWSTimeZone.timezone('Europe/Copenhagen')).strftime('%H:%M'), html2text.html2text(item.body), item.location)
for subcalendar in item.categories:
try:
mattermostHook.send(eventContent, channel=subcalendar)
except Exception as e:
messageContent = eventContent + '\n Error occured: \n {0} \n'.format(e.__doc__)
mattermostHook.send(messageContent, channel=mattermostSettings['DefaultChannel'])
def dl_scripts():
url = BASE_URL
r = requests.get(url)
tree = BeautifulSoup(r.text, "html.parser")
os.makedirs("plays", exist_ok=True)
for a in tree.find_all("a")[2:-7]:
link = a.get("href").split("/")[0]
title = a.text.strip().replace(" ", "_")
title = title.replace("\n", "_")
fn = "plays/" + title + ".txt"
r = requests.get(BASE_URL + "/" + link + "/full.html")
body = html2text(r.text.replace("blockquote", "p"))
body = body[body.index("### ACT I"):]
with open(fn, "w") as f:
f.write(body)
email_service.py 文件源码
项目:cookiecutter-pyramid-talk-python-starter
作者: mikeckennedy
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def send_email(to_address, subject, html_body):
try:
smtp = EmailService.create_smtp_server()
message = mailer.Message(
From=EmailService.__from_address,
To=to_address,
charset='utf-8')
message.Subject = subject
message.Html = html_body
message.Body = html2text.html2text(html_body)
if not EmailService.__is_debug_mode:
print("Sending message (live!)")
smtp.send(message)
else:
print("Skipping send, email is in dev mode.")
except Exception as x:
print("Error sending mail: {}".format(x))
def send_email(to_address, subject, html_body):
try:
smtp = EmailService.create_smtp_server()
message = mailer.Message(
From=EmailService.__from_address,
To=to_address,
charset='utf-8')
message.Subject = subject
message.Html = html_body
message.Body = html2text.html2text(html_body)
if not EmailService.__is_debug_mode:
print("Sending message (live!)")
smtp.send(message)
else:
print("Skipping send, email is in dev mode.")
except Exception as x:
print("Error sending mail: {}".format(x))
def build_mime_text(recipients, subject, message):
"""
Puts message data into MIME format
:param recipients: array of email addresses to send email to
:param subject: subject of email
:param message: body of email
:return MIMEMultipart object
"""
# Record the MIME types of text/plain and text/html.
part1 = MIMEText(html2text.html2text(message), 'plain')
part2 = MIMEText(message, 'html')
# Attach parts into mime message container.
body = MIMEMultipart('alternative')
body['Subject'] = subject
body['From'] = options.smtp_from
body['To'] = ",".join(recipients)
body.attach(part1)
body.attach(part2)
raise Return(body)
def staff_reminder(request): # pylint: disable=invalid-name
if config.STAFF_EMAIL_REMINDER:
request_type = type(request).__name__.lower()
staff_url = "/email/template/{}/staff/reminder/".format(
request_type
)
context = {
request_type: request,
"protocol": "https",
"site": Site.objects.get(id=SITE_ID),
"FELLOWS_MANAGEMENT_EMAIL": config.FELLOWS_MANAGEMENT_EMAIL,
}
flatemail = FlatPage.objects.get(url=staff_url)
template = Template(flatemail.content)
jinja_context = Context(context)
html = template.render(jinja_context)
plain_text = html2text(html)
mail_staffs(
flatemail.title,
plain_text,
html_message=html,
fail_silently=False
)
def ENMLtoText(contentENML):
soup = BeautifulSoup(contentENML.decode('utf-8'))
for section in soup.select('li > p'):
section.replace_with( section.contents[0] )
for section in soup.select('li > br'):
if section.next_sibling:
next_sibling = section.next_sibling.next_sibling
if next_sibling:
if next_sibling.find('li'):
section.extract()
else:
section.extract()
Editor.checklistInENMLtoSoup(soup)
for section in soup.findAll('en-todo', checked='true'):
section.replace_with('[x]')
for section in soup.findAll('en-todo'):
section.replace_with('[ ]')
content = html2text.html2text(str(soup).decode('utf-8'), '', 0)
content = re.sub(r' *\n', os.linesep, content)
return content.encode('utf-8')
def markdown_db_migrate():
'''Perform a migration of the app long descriptions from HTML to
Markdown for existing database records'''
with app.app_context():
query = 'SELECT id, long_description FROM "app";'
query_result = db.engine.execute(query)
old_descriptions = query_result.fetchall()
for old_desc in old_descriptions:
if old_desc.long_description:
new_description = html2text(old_desc.long_description)
query = text('''
UPDATE app SET long_description=:long_description
WHERE id=:id''')
db.engine.execute(query, long_description = new_description, id = old_desc.id)
def _handle_anime(entry):
embed = discord.Embed(title=entry.title.string)
embed.url = BASE_URL_MYANIMELIST.format("anime", entry.id.string)
embed.add_field(name="ID", value=entry.id.string)
embed.add_field(name="Synonyms", value=entry.synonyms.string)
embed.add_field(name="Episodes", value=entry.episodes.string)
embed.add_field(name="Score", value=entry.score.string)
embed.add_field(name="Type", value=entry.type.string)
embed.add_field(name="Status", value=entry.status.string)
embed.add_field(name="Start date", value=entry.start_date.string)
embed.add_field(name="End date", value=entry.end_date.string)
embed.description = html2text.html2text(entry.synopsis.string)
return embed
def _handle_manga(entry):
embed = discord.Embed(title=entry.title.string)
embed.url = BASE_URL_MYANIMELIST.format("manga", entry.id.string)
embed.add_field(name="ID", value=entry.id.string)
embed.add_field(name="Synonyms", value=entry.synonyms.string)
embed.add_field(name="Chapters", value=entry.chapters.string)
embed.add_field(name="Volumes", value=entry.volumes.string)
embed.add_field(name="Score", value=entry.score.string)
embed.add_field(name="Type", value=entry.type.string)
embed.add_field(name="Status", value=entry.status.string)
embed.add_field(name="Start date", value=entry.start_date.string)
embed.add_field(name="End date", value=entry.end_date.string)
embed.description = html2text.html2text(entry.synopsis.string)
return embed
def send_html_email(to_addr, **kwargs):
data_dict = kwargs['data_dict']
subject_template = kwargs['subject_template']
email_template = kwargs['email_template']
email_tag = settings.EMAIL_TAG
subject = "{} {}".format(email_tag, remove_newlines(render_to_string(subject_template, data_dict)))
html_body = render_to_string(email_template, data_dict)
text_body = html2text.html2text(html_body)
send_mail(subject=subject,
message=text_body,
from_email=settings.DEFAULT_FROM_EMAIL,
recipient_list=to_addr,
fail_silently=True,
html_message=html_body)
def load_active_text(soup):
text_entry = soup.select("text[active=1]")[0]
content = text_entry.find("content").get_text()
content = BeautifulSoup(content, "lxml")
for node in content.select("code a"):
node.parent.unwrap()
return text_entry["id"], html2text(str(content))
def add_localization(language, exercise_id, config_path):
config = Config.load(Path.cwd() / (config_path or "import-config.yml"))
api = ApiClient(config.api_url, config.api_token)
exercise = api.get_exercise(exercise_id)
exercise["localizedTexts"].append({
"locale": language,
"text": html2text(sys.stdin.read())
})
api.update_exercise(exercise_id, exercise)
def get_website_languages(self,json_data):
url_language_dictionary = {}
url_count = 0
for article in json_data:
for url in json_data[article]:
url_count += 1
# print url_count
if url in url_language_dictionary:
continue
# start a timeout counter
signal.alarm(10)
try:
html = urllib.urlopen(url)
encoding = html.headers.getparam('charset')
if encoding is None:
encoding = chardet.detect(html.read())['encoding']
encoded_html = unicode(html.read(),encoding , errors='replace')
markup_text = html2text.html2text(encoded_html)
html_from_markup = markdown(markup_text)
text = ''.join(BeautifulSoup(html_from_markup,"lxml").findAll(text=True))
language = detect(text)
url_language_dictionary[url] = language
except TimeoutException:
print "timeout for: " + url
except Exception as exception:
print "Continue after " + exception.__class__.__name__ + " for URL: " + url
continue
return url_language_dictionary
def converthtml2text(html):
# build the flat text
html2text.BODY_WIDTH = 0
html2text.IGNORE_ANCHORS = True
html2text.IGNORE_IMAGES = True
outstr = html2text.html2text(html)
# html2text adds markup: | for bold, ** for italic, # for header, *** for hr - remove
outstr = outstr.replace("|", "")
outstr = outstr.replace("**", "")
outstr = outstr.replace("# ", "")
outstr = outstr.replace("* * *", "")
# remove double spaces
while True:
filelen = len(outstr)
outstr = outstr.replace(" ", " ")
if filelen == len(outstr):
break
outstr = outstr.replace("\n ", "\n")
outstr = outstr.replace(" \n", "\n")
# remove empty lines
while True:
filelen = len(outstr)
outstr = outstr.replace("\n\n", "\n")
if filelen == len(outstr):
break
return outstr
# sends to OCR a PDF file
# the text file is stored in the folder targetpath
# returns the path of the output txt file
# uses Abby FineReader Hot folder
# if text file already exists (previously OCR), does not OCR again
# can be replaced with other method if necessary
# returns a tuple
# 1st element - operation code (ERROR, CREATED, EXISTS)
# 2nd element - error message or ocr file path
def html2text(s):
s = re.compile('</*en-media[^>]*?>').sub('', s)
return h2t(s)
def process(input, entities):
output = {}
try:
book_title = entities['book'][0]['value']
with requests_cache.enabled('book_cache', backend='sqlite', expire_after=86400):
response = requests.get(
'https://www.goodreads.com/book/title.xml?key=' + GOODREADS_ACCESS_TOKEN + '&title=' + book_title)
data = ElementTree.fromstring(response.content)
book_node = data.find('book')
author = book_node.find('authors').find('author').find('name').text
title = book_node.find('title').text
description = html2text(book_node.find('description').text)
average_rating = book_node.find('average_rating').text
link = book_node.find('link').text
goodreads_attribution = '- Powered by Goodreads'
template = TextTemplate()
template.set_text('Title: ' + title + '\nAuthor: ' + author + '\nDescription: ' + description)
template.set_post_text('\nAverage Rating: ' + average_rating + ' / 5' + '\n' + goodreads_attribution)
text = template.get_text()
template = ButtonTemplate(text)
template.add_web_url('Goodreads Link', link)
output['input'] = input
output['output'] = template.get_message()
output['success'] = True
except:
error_message = 'I couldn\'t find any book matching your query.'
error_message += '\nPlease ask me something else, like:'
error_message += '\n - book timeline'
error_message += '\n - harry potter book plot'
error_message += '\n - little women book rating'
output['error_msg'] = TextTemplate(error_message).get_message()
output['success'] = False
return output
def format_text(html):
text = html2text.html2text(html, bodywidth=0).replace('\n\n', '\n')
text = HTMLParser.HTMLParser().unescape(text)
text = strip_markdown.strip(text)
text = re.sub(' +\n', '\n', text).strip()
text = re.sub('\n\n\n', '\n\n', text)
return text
def sentenceTokenize():
sentences = html2text.html2text(request.form['sentences'])
result = nlp.sentenceTokenize(sentences)
return buildResponse.sentPlainText(result)
def posTagAndLabel():
sentences = request.form['sentences']
cleanSentences = html2text.html2text(sentences)
result = nlp.posTagAndLabel(cleanSentences)
return buildResponse.buildJson(result)
def send(self, force=False):
if not self.enabled and not force:
return
subject = six.text_type(DjangoTemplate(self.subject).render(Context(self.variables)))
html = self.compile()
text = html2text.html2text(html)
self._send(subject, text, settings.HAPPYMAILER_FROM, recipient_list=self.recipients(),
html_message=html, fail_silently=False)
def main():
speech.stop()
if not appex.is_running_extension():
console.hud_alert('Reading clipboard')
text = clipboard.get()
url = None
else:
text = appex.get_text()
url = appex.get_url()
if url == None:
try:
url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0]
except:
pass
if url != None:
console.hud_alert('Reading: ' + url)
h = html2text.HTML2Text()
try:
r = requests.get(
url=url,
headers={"User-agent": "Mozilla/5.0{0:06}".format(random.randrange(999999))})
except requests.ConnectionError as e:
console.alert('Unable to connect to url.')
return True
html_content = r.text.decode('utf-8')
text = html2text.html2text(html_content)
else:
console.hud_alert('Reading text: ' + str(text))
if text:
speech.say(text)
stop = console.alert('Done?', hide_cancel_button=True, button1='OK')
speech.stop()
else:
console.hud_alert('No text found.')
def main():
if appex.is_running_extension():
url = appex.get_url()
if url == None:
text = appex.get_text()
url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0]
else:
text = clipboard.get().strip()
url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0]
if not "http" in url:
url = "http://"
try:
url = console.input_alert("URL", "", url)
except:
return True
console.hud_alert('URL: %s' % url)
h = html2text.HTML2Text()
try:
r = requests.get(
url=url,
headers={"User-agent": "Mozilla/5.0{0:06}".format(random.randrange(999999))}
)
except Exception as e:
raise(e.message)
return True
html_content = r.text.decode('utf-8')
rendered_content = html2text.html2text(html_content)
clipboard.set(rendered_content)
launch_e = console.alert('Markdown copied to clipboard. Launch Evernote?', button1='Yes', button2='No', hide_cancel_button=True)
if launch_e ==1:
_eurl = "evernote://x-callback-url/new-note?type=clipboard&title=DRAFT&text="
app=UIApplication.sharedApplication()
eurl=nsurl(_eurl)
app.openURL_(eurl)
appex.finish()
def main():
if appex.is_running_extension():
url = appex.get_url()
else:
url = clipboard.get().strip()
if not RE_URL.match(url):
try:
url = console.input_alert("Enter gamefaqs URL", "", "https://www.gamefaqs.com/")
except KeyboardInterrupt:
sys.exit(0)
newurl = "{0}?print=1".format(url)
#baseurl = http://www.gamefaqs.com/ps3/959558-fallout-new-vegas/faqs/61226
if RE_URL.match(url):
h = html2text.HTML2Text()
r = requests.get(
url=newurl,
headers={"User-agent": "Mozilla/5.0{0:06}".format(random.randrange(999999))}
)
html_content = r.text.decode('utf-8')
rendered_content = html2text.html2text(html_content)
filename = url.partition("gamefaqs.com/")[-1].partition("/")[-1].partition("/faqs")[0]+".txt"
filepath = os.path.join(os.path.expanduser("~/Documents"), filename)
with open(filepath, "w") as fo:
fo.write(rendered_content)
console.hud_alert('Success! Saved {0}'.format(filename), "success")
def get_site_text(url):
resp = requests.get(url)
resp.raise_for_status()
html = resp.text
return html2text.html2text(html)
# 2: Score each word for an individual page against the full set of pages
def get_site_text(url):
resp = requests.get(url)
resp.raise_for_status()
html = resp.text
return html2text.html2text(html)
#2: Import stopwords from an external file
def get_plain_text(self):
action = html2text(smart_str(self.action)).rstrip()
effect = html2text(smart_str(self.effect)).rstrip()
setup = html2text(smart_str(self.setup)).rstrip()
breakdown = html2text(smart_str(self.breakdown)).rstrip()
return PlainText(action=action, setup=setup,
effect=effect, breakdown=breakdown)
def process(input, entities):
output = {}
try:
book_title = entities['book'][0]['value']
with requests_cache.enabled('book_cache', backend='sqlite', expire_after=86400):
response = requests.get('https://www.goodreads.com/book/title.xml?key=' + GOODREADS_ACCESS_TOKEN + '&title=' + book_title)
data = ElementTree.fromstring(response.content)
book_node = data.find('book')
author = book_node.find('authors').find('author').find('name').text
title = book_node.find('title').text
description = html2text(book_node.find('description').text)
average_rating = book_node.find('average_rating').text
link = book_node.find('link').text
goodreads_attribution = '- Powered by Goodreads'
template = TextTemplate()
template.set_text('Title: ' + title + '\nAuthor: ' + author + '\nDescription: ' + description)
template.set_post_text('\nAverage Rating: ' + average_rating + ' / 5' + '\n' + goodreads_attribution)
text = template.get_text()
template = ButtonTemplate(text)
template.add_web_url('Goodreads Link', link)
output['input'] = input
output['output'] = template.get_message()
output['success'] = True
except:
error_message = 'I couldn\'t find any book matching your query.'
error_message += '\nPlease ask me something else, like:'
error_message += '\n - book timeline'
error_message += '\n - harry potter book plot'
error_message += '\n - little women book rating'
output['error_msg'] = TextTemplate(error_message).get_message()
output['success'] = False
return output