def test_decode_header(self):
for p, e in self.HSTRINGS:
self.assertEqual(quopri.decodestring(e, header=True), p)
python类decodestring()的实例源码
def printBody( self, message, body, cstr ):
if message.has_key('Content-Transfer-Encoding') and message.get('Content-Transfer-Encoding')=='base64':
try:
body = base64.b64decode(body)
#cstr.write('decoded base64 successfully' + '\n')
except:
cstr.write('WARNING - could not decode base64' + '\n')
#pj suggested improvement by vragosta to get rid of occasional " =20" at end of lines.
#cstr.write(body + '\n')
cstr.write(quopri.decodestring(body) + '\n')
# -------------------------------------------------
# Get and print to STDOUT the mail message
# -------------------------------------------------
def test_decodestring(self):
for p, e in self.STRINGS:
self.assertTrue(quopri.decodestring(e) == p)
def test_idempotent_string(self):
for p, e in self.STRINGS:
self.assertTrue(quopri.decodestring(quopri.encodestring(e)) == e)
def test_embedded_ws(self):
for p, e in self.ESTRINGS:
self.assertTrue(quopri.encodestring(p, quotetabs=True) == e)
self.assertTrue(quopri.decodestring(e) == p)
def test_decode_header(self):
for p, e in self.HSTRINGS:
self.assertTrue(quopri.decodestring(e, header=True) == p)
def test_decodestring(self):
for p, e in self.STRINGS:
self.assertEqual(quopri.decodestring(e), p)
def test_idempotent_string(self):
for p, e in self.STRINGS:
self.assertEqual(quopri.decodestring(quopri.encodestring(e)), e)
def test_embedded_ws(self):
for p, e in self.ESTRINGS:
self.assertEqual(quopri.encodestring(p, quotetabs=True), e)
self.assertEqual(quopri.decodestring(e), p)
def test_decode_header(self):
for p, e in self.HSTRINGS:
self.assertEqual(quopri.decodestring(e, header=True), p)
def _infer_text_fragment_inner(self, title, body, post_id):
# dead code? If not needs to be refactored with langstrings
# and moved within text_fragment, maybe?
body = sanitize_html(body, [])
quote = self.quote.replace("\r", "")
try:
# for historical reasons
quote = quopri.decodestring(quote)
except:
pass
quote = sanitize_html(quote, [])
if quote != self.body:
self.body = quote
quote = quote.replace("\n", "")
start = body.find(quote)
lookin = 'message-body'
if start < 0:
xpath = "//div[@id='%s']/div[class='post_title']" % (post_id)
start = title.find(quote)
if start < 0:
return None
lookin = 'message-subject'
xpath = "//div[@id='message-%s']//div[@class='%s']" % (
Post.uri_generic(post_id), lookin)
tfi = self.db.query(TextFragmentIdentifier).filter_by(
extract=self).first()
if not tfi:
tfi = TextFragmentIdentifier(extract=self)
tfi.xpath_start = tfi.xpath_end = xpath
tfi.offset_start = start
tfi.offset_end = start+len(quote)
return tfi
def __init__(self, filename):
mail = mailbox.mbox(filename, create=False)[0]
# Simply name it commit_hash, otherwise we would have to refactor
# tons of code.
self.commit_hash = mail['Message-ID']
self.mail_subject = mail['Subject']
# we need timezone aware datetimes due to the fact, that most of all
# emails contain timezone aware timestamps. There's an issue with
# timezone unaware timestamps: they can't be compared to timezone aware
# timestamps. To cope with that, we simple shift those mails to UTC
# (which is also true in most cases).
#
# E.g. python converts this timestamp to an timezone unaware one,
# while it is GMT:
# 'Fri, 23 Feb 2007 13:35:50 -0000 (GMT)'
try:
date = email.utils.parsedate_to_datetime(mail['Date'])
except:
# assume epoch
log.debug(' Message %s: unable to parse date %s' %
(self.commit_hash, mail['Date']))
date = datetime.datetime.utcfromtimestamp(0)
if date.tzinfo is None:
date = date.replace(tzinfo=datetime.timezone.utc)
payload = mail.get_payload()
# Check encoding and decode
cte = mail['Content-Transfer-Encoding']
if cte == 'QUOTED-PRINTABLE':
charset = mail.get_content_charset()
if charset not in CHARSETS:
charset = 'ascii'
payload = quopri.decodestring(payload)
payload = payload.decode(charset, errors='ignore')
# MAY RAISE AN ERROR, FORBID RETURN NULL
msg, diff = parse_payload(payload)
# reconstruct commit message
subject = self.mail_subject
match = PATCH_SUBJECT_REGEX.match(self.mail_subject)
if match:
subject = match.group(1)
msg = [subject, ''] + msg
author_name = mail['From']
author_email = ''
match = MAIL_FROM_REGEX.match(author_name)
if match:
author_name = match.group(1)
author_email = match.group(2)
super(PatchMail, self).__init__(msg, diff, author_name, author_email,
date, snip_header=True)
def import_mail(ctx, tag, category):
content = ""
for l in click.get_text_stream('stdin'):
content = content + l
msg = email.message_from_string(content)
# title
subject, encoding = email.header.decode_header(msg['Subject'])[0]
if encoding is None:
encoding = "utf-8"
title = subject.decode(encoding)
# content
content = msg.get_payload(decode=False)
content = quopri.decodestring(content)
content = "# " + title + '\n\n' + content
date = datetime.datetime.now()
coll = db.get_document_collection(ctx)
config = ctx.obj["config"]
item = {
"title": title,
"content": content,
"tags": list(tag),
"categories": list(category),
"created": date,
"updated": date,
"encrypted": False,
}
# insert item if its valid
if validate(item):
coll = db.get_document_collection(ctx)
docid = coll.insert_one(item).inserted_id
transaction.log(ctx, str(docid), "import", title)
utils.log_info("Document \"%s\" created." % title)
else:
utils.log_error("Validation of the updated object did not succeed")