def _apply(self, value):
value = self._filter(value, Type(text_type)) # type: Text
if self._has_errors:
return None
try:
# :see: http://stackoverflow.com/a/6921760
return self.decoder(value, object_pairs_hook=OrderedDict)
except ValueError:
return self._invalid_value(value, self.CODE_INVALID, exc_info=True)
python类Text()的实例源码
def __init__(
self,
max_bytes,
truncate = True,
prefix = '',
encoding = 'utf-8',
):
# type: (int, bool, Text, Text) -> None
"""
:param max_bytes:
Max number of bytes to allow.
:param truncate:
Whether to truncate values that are too long.
Set this to ``False`` to save system resources when you
know that you will reject values that are too long.
:param prefix:
Prefix to apply to truncated values.
Ignored when ``truncate`` is ``False``.
:param encoding:
The character encoding to check against.
Note: This filter is optimized for UTF-8.
"""
super(MaxBytes, self).__init__()
self.encoding = encoding
self.max_bytes = max_bytes
self.prefix = prefix
self.truncate = truncate
def __init__(self, leading=r'[\p{C}\s]+', trailing=r'[\p{C}\s]+'):
# type: (Text, Text) -> None
"""
:param leading:
Regex to match at the start of the string.
:param trailing:
Regex to match at the end of the string.
"""
super(Strip, self).__init__()
if leading:
self.leading = regex.compile(
r'^{pattern}'.format(pattern=leading),
regex.UNICODE,
)
else:
self.leading = None
if trailing:
self.trailing = regex.compile(
r'{pattern}$'.format(pattern=trailing),
regex.UNICODE,
)
else:
self.trailing = None
def __init__(self, encoding='utf-8', normalize=True):
# type: (Text, bool) -> None
"""
:param encoding:
Used to decode non-unicode values.
:param normalize:
Whether to normalize the resulting value:
- Convert to NFC form.
- Remove non-printable characters.
- Convert all line endings to unix-style ('\n').
"""
super(Unicode, self).__init__()
self.encoding = encoding
self.normalize = normalize
if self.normalize:
#
# Compile the regex that we will use to remove non-
# printables from the resulting unicode.
# http://www.regular-expressions.info/unicode.html#category
#
# Note: using a double negative so that we can exclude
# newlines, which are technically considered control chars.
# http://stackoverflow.com/a/3469155
#
self.npr = regex.compile(r'[^\P{C}\s]+', regex.UNICODE)
def _apply(self, value):
decoded = super(ByteString, self)._apply(value) # type: Text
#
# No need to catch UnicodeEncodeErrors here; UTF-8 can handle
# any unicode value.
#
# Technically, we could get this error if we encounter a code
# point beyond U+10FFFF (the highest valid code point in the
# Unicode standard).
#
# However, it's not possible to create a `unicode` object with
# an invalid code point, so we wouldn't even be able to get
# this far if the incoming value contained a character that
# can't be represented using UTF-8.
#
# Note that in some versions of Python, it is possible (albeit
# really difficult) to trick Python into creating unicode
# objects with invalid code points, but it generally requires
# using specific codecs that aren't UTF-8.
#
# Example of exploit and release notes from the Python release
# (2.7.6) that fixes the issue:
#
# - https://gist.github.com/rspeer/7559750
# - https://hg.python.org/cpython/raw-file/99d03261c1ba/Misc/NEWS
#
# Normally we return ``None`` if we get any errors, but in this
# case, we'll let the superclass method decide.
return decoded if self._has_errors else decoded.encode('utf-8')
def __init__(self, region: Text, key: Text):
self._region = region
self._key = key
def _format_api_base(self) -> Text:
return 'https://{region}.api.cognitive.microsoft.com'.format(
region=self._region)
def _format_headers(self, kv: Iterable[Header]) -> Dict[Text, Text]:
headers = {self._auth_keyname: self._key}
for key, value in kv:
headers[key] = value
return headers
def _get_json(self, url: Text, **kwargs) -> Dict:
return self._make_json_request('get', url, **kwargs)
def _post_json(self, url: Text, **kwargs) -> Dict:
return self._make_json_request('post', url, **kwargs)
def _auth_keyname(self) -> Text:
raise NotImplementedError
def _format_projects_endpoint(self) -> Text:
return '{base}/customvision/v1.0/Training/projects'.format(
base=self._format_api_base())
def _format_new_project_endpoint(self, project_name: Text) -> Text:
query = (('name', project_name),
('description', ''),
('classifier', 'MultiLabel'),
('useNegativeSet', 'true'))
return '{base}?{query}'.format(
base=self._format_projects_endpoint(),
query='&'.join('{}={}'.format(*kv) for kv in query))
def _format_project_endpoint(self, project_id: Text) -> Text:
return '{base}/{project_id}'.format(
base=self._format_projects_endpoint(),
project_id=project_id)
def _format_tags_endpoint(self, project_id: Text) -> Text:
return '{base}/tags'.format(
base=self._format_project_endpoint(project_id))
def _format_training_endpoint(self, project_id: Text) -> Text:
return '{base}/train'.format(
base=self._format_project_endpoint(project_id))
def _format_image_url(self, project_id: Text, tags: Iterable[Tag]) -> Text:
return '{base}/images/image?tagIds={tagIds}'.format(
base=self._format_project_endpoint(project_id),
tagIds='&tagIds='.join(tag.Id for tag in tags))
def _fetch_project_tags(self, project_id: Text) -> Iterable[Tag]:
url = self._format_tags_endpoint(project_id)
response = self._get_json(url)
return [create(Tag, _) for _ in response['Tags']]
def _fetch_tags_for_names(self, project_id: Text,
names: Iterable[Text]) -> Iterable[Tag]:
all_tags = {tag.Name: tag
for tag in self._fetch_project_tags(project_id)}
return [all_tags[name] for name in names]
def create_project(self, project_name: Text) -> Project:
url = self._format_new_project_endpoint(project_name)
response = self._post_json(url, headers=[('Content-Length', '0')])
return create(Project, response)