def decode(self, text, encoding=None, normalization=None):
"""Return ``text`` as normalised unicode.
If ``encoding`` and/or ``normalization`` is ``None``, the
``input_encoding``and ``normalization`` parameters passed to
:class:`Workflow` are used.
:param text: string
:type text: encoded or Unicode string. If ``text`` is already a
Unicode string, it will only be normalised.
:param encoding: The text encoding to use to decode ``text`` to
Unicode.
:type encoding: ``unicode`` or ``None``
:param normalization: The nomalisation form to apply to ``text``.
:type normalization: ``unicode`` or ``None``
:returns: decoded and normalised ``unicode``
:class:`Workflow` uses "NFC" normalisation by default. This is the
standard for Python and will work well with data from the web (via
:mod:`~workflow.web` or :mod:`json`).
OS X, on the other hand, uses "NFD" normalisation (nearly), so data
coming from the system (e.g. via :mod:`subprocess` or
:func:`os.listdir`/:mod:`os.path`) may not match. You should either
normalise this data, too, or change the default normalisation used by
:class:`Workflow`.
"""
encoding = encoding or self._input_encoding
normalization = normalization or self._normalizsation
if not isinstance(text, unicode):
text = unicode(text, encoding)
return unicodedata.normalize(normalization, text)
评论列表
文章目录