def get_mailbox(value):
""" mailbox = name-addr / addr-spec
"""
# The only way to figure out if we are dealing with a name-addr or an
# addr-spec is to try parsing each one.
mailbox = Mailbox()
try:
token, value = get_name_addr(value)
except errors.HeaderParseError:
try:
token, value = get_addr_spec(value)
except errors.HeaderParseError:
raise errors.HeaderParseError(
"expected mailbox but found '{}'".format(value))
if any(isinstance(x, errors.InvalidHeaderDefect)
for x in token.all_defects):
mailbox.token_type = 'invalid-mailbox'
mailbox.append(token)
return mailbox, value
python类HeaderParseError()的实例源码
def get_ttext(value):
"""ttext = <matches _ttext_matcher>
We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
defects list if we find non-ttext characters. We also register defects for
*any* non-printables even though the RFC doesn't exclude all of them,
because we follow the spirit of RFC 5322.
"""
m = _non_token_end_matcher(value)
if not m:
raise errors.HeaderParseError(
"expected ttext but found '{}'".format(value))
ttext = m.group()
value = value[len(ttext):]
ttext = ValueTerminal(ttext, 'ttext')
_validate_xtext(ttext)
return ttext, value
def get_token(value):
"""token = [CFWS] 1*ttext [CFWS]
The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
tspecials. We also exclude tabs even though the RFC doesn't.
The RFC implies the CFWS but is not explicit about it in the BNF.
"""
mtoken = Token()
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
mtoken.append(token)
if value and value[0] in TOKEN_ENDS:
raise errors.HeaderParseError(
"expected token but found '{}'".format(value))
token, value = get_ttext(value)
mtoken.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
mtoken.append(token)
return mtoken, value
def get_attrtext(value):
"""attrtext = 1*(any non-ATTRIBUTE_ENDS character)
We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
token's defects list if we find non-attrtext characters. We also register
defects for *any* non-printables even though the RFC doesn't exclude all of
them, because we follow the spirit of RFC 5322.
"""
m = _non_attribute_end_matcher(value)
if not m:
raise errors.HeaderParseError(
"expected attrtext but found {!r}".format(value))
attrtext = m.group()
value = value[len(attrtext):]
attrtext = ValueTerminal(attrtext, 'attrtext')
_validate_xtext(attrtext)
return attrtext, value
def get_attribute(value):
""" [CFWS] 1*attrtext [CFWS]
This version of the BNF makes the CFWS explicit, and as usual we use a
value terminal for the actual run of characters. The RFC equivalent of
attrtext is the token characters, with the subtraction of '*', "'", and '%'.
We include tab in the excluded set just as we do for token.
"""
attribute = Attribute()
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
attribute.append(token)
if value and value[0] in ATTRIBUTE_ENDS:
raise errors.HeaderParseError(
"expected token but found '{}'".format(value))
token, value = get_attrtext(value)
attribute.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
attribute.append(token)
return attribute, value
def get_extended_attrtext(value):
"""attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
This is a special parsing routine so that we get a value that
includes % escapes as a single string (which we decode as a single
string later).
"""
m = _non_extended_attribute_end_matcher(value)
if not m:
raise errors.HeaderParseError(
"expected extended attrtext but found {!r}".format(value))
attrtext = m.group()
value = value[len(attrtext):]
attrtext = ValueTerminal(attrtext, 'extended-attrtext')
_validate_xtext(attrtext)
return attrtext, value
def get_value(value):
""" quoted-string / attribute
"""
v = Value()
if not value:
raise errors.HeaderParseError("Expected value but found end of string")
leader = None
if value[0] in CFWS_LEADER:
leader, value = get_cfws(value)
if not value:
raise errors.HeaderParseError("Expected value but found "
"only {}".format(leader))
if value[0] == '"':
token, value = get_quoted_string(value)
else:
token, value = get_extended_attribute(value)
if leader is not None:
token[:0] = [leader]
v.append(token)
return v, value
def test_set_boundary(self):
eq = self.assertEqual
# This one has no existing boundary parameter, but the Content-Type:
# header appears fifth.
msg = self._msgobj('msg_01.txt')
msg.set_boundary('BOUNDARY')
header, value = msg.items()[4]
eq(header.lower(), 'content-type')
eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
# This one has a Content-Type: header, with a boundary, stuck in the
# middle of its headers. Make sure the order is preserved; it should
# be fifth.
msg = self._msgobj('msg_04.txt')
msg.set_boundary('BOUNDARY')
header, value = msg.items()[4]
eq(header.lower(), 'content-type')
eq(value, 'multipart/mixed; boundary="BOUNDARY"')
# And this one has no Content-Type: header at all.
msg = self._msgobj('msg_03.txt')
self.assertRaises(errors.HeaderParseError,
msg.set_boundary, 'BOUNDARY')
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Return (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Return (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Return (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Return (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def smtp_VRFY(self, arg):
if arg:
try:
address, params = self._getaddr(arg)
except HeaderParseError:
address = None
if address is None:
await self.push('502 Could not VRFY %s' % arg)
else:
status = await self._call_handler_hook('VRFY', address)
await self.push(
'252 Cannot VRFY user, but will accept message '
'and attempt delivery'
if status is MISSING else status)
else:
await self.push('501 Syntax: VRFY <address>')
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Return (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def parse_docstring(docstring):
"""
Parse out the parts of a docstring. Returns (title, body, metadata).
"""
docstring = trim_docstring(docstring)
parts = re.split(r'\n{2,}', docstring)
title = parts[0]
if len(parts) == 1:
body = ''
metadata = {}
else:
parser = HeaderParser()
try:
metadata = parser.parsestr(parts[-1])
except HeaderParseError:
metadata = {}
body = "\n\n".join(parts[1:])
else:
metadata = dict(metadata.items())
if metadata:
body = "\n\n".join(parts[1:-1])
else:
body = "\n\n".join(parts[1:])
return title, body, metadata
def encode(self, splitchars=';, '):
"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
This method will do its best to convert the string to the correct
character set used in email, and encode and line wrap it safely with
the appropriate scheme for that character set.
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
Optional splitchars is a string containing characters to split long
ASCII lines on, in rough support of RFC 2822's `highest level
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
"""
newchunks = []
maxlinelen = self._firstlinelen
lastlen = 0
for s, charset in self._chunks:
# The first bit of the next chunk should be just long enough to
# fill the next line. Don't forget the space separating the
# encoded words.
targetlen = maxlinelen - lastlen - 1
if targetlen < charset.encoded_header_len(''):
# Stick it on the next line
targetlen = maxlinelen
newchunks += self._split(s, charset, targetlen, splitchars)
lastchunk, lastcharset = newchunks[-1]
lastlen = lastcharset.encoded_header_len(lastchunk)
value = self._encode_chunks(newchunks, maxlinelen)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
return value
def test_broken_base64_header(self):
raises = self.assertRaises
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
raises(errors.HeaderParseError, decode_header, s)
# Test RFC 2231 header parameters (en/de)coding
def encode(self, splitchars=';, '):
"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
This method will do its best to convert the string to the correct
character set used in email, and encode and line wrap it safely with
the appropriate scheme for that character set.
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
Optional splitchars is a string containing characters to split long
ASCII lines on, in rough support of RFC 2822's `highest level
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
"""
newchunks = []
maxlinelen = self._firstlinelen
lastlen = 0
for s, charset in self._chunks:
# The first bit of the next chunk should be just long enough to
# fill the next line. Don't forget the space separating the
# encoded words.
targetlen = maxlinelen - lastlen - 1
if targetlen < charset.encoded_header_len(''):
# Stick it on the next line
targetlen = maxlinelen
newchunks += self._split(s, charset, targetlen, splitchars)
lastchunk, lastcharset = newchunks[-1]
lastlen = lastcharset.encoded_header_len(lastchunk)
value = self._encode_chunks(newchunks, maxlinelen)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
return value
def test_broken_base64_header(self):
raises = self.assertRaises
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
raises(errors.HeaderParseError, decode_header, s)
# Test RFC 2231 header parameters (en/de)coding
def encode(self, splitchars=';, '):
"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
This method will do its best to convert the string to the correct
character set used in email, and encode and line wrap it safely with
the appropriate scheme for that character set.
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
Optional splitchars is a string containing characters to split long
ASCII lines on, in rough support of RFC 2822's `highest level
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
"""
newchunks = []
maxlinelen = self._firstlinelen
lastlen = 0
for s, charset in self._chunks:
# The first bit of the next chunk should be just long enough to
# fill the next line. Don't forget the space separating the
# encoded words.
targetlen = maxlinelen - lastlen - 1
if targetlen < charset.encoded_header_len(''):
# Stick it on the next line
targetlen = maxlinelen
newchunks += self._split(s, charset, targetlen, splitchars)
lastchunk, lastcharset = newchunks[-1]
lastlen = lastcharset.encoded_header_len(lastchunk)
value = self._encode_chunks(newchunks, maxlinelen)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
return value