python类normalize()的实例源码

workflow.py 文件源码 项目:alfred-mpd 作者: deanishe 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
util.py 文件源码 项目:aiodownload 作者: jelloslinger 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def clean_filename(filename):
    """Return a sanitized filename (replace / strip out illegal characters)

    :param filename: string used for a filename
    :type filename: str

    :return: sanitized filename
    :rtype: str
    """

    return ''.join([
        c for c in unicodedata.normalize(
            'NFKD',
            ''.join([REPLACEMENT_CHAR.get(c, c) for c in filename])
        )
        if not unicodedata.combining(c) and c in '-_.() {0}{1}'.format(string.ascii_letters, string.digits)
    ])
bottle.py 文件源码 项目:dabdabrevolution 作者: harryparkdotio 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def filename(self):
        """ Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        """
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname)
        fname = fname.encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'
workflow.py 文件源码 项目:Gank-Alfred-Workflow 作者: hujiaweibujidao 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
svn_utils.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def decode_as_string(text, encoding=None):
    """
    Decode the console or file output explicitly using getpreferredencoding.
    The text paraemeter should be a encoded string, if not no decode occurs
    If no encoding is given, getpreferredencoding is used.  If encoding is
    specified, that is used instead.  This would be needed for SVN --xml
    output.  Unicode is explicitly put in composed NFC form.

    --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
    DEV List from 2007 seems to indicate the same.
    """
    #text should be a byte string

    if encoding is None:
        encoding = _console_encoding

    if not isinstance(text, unicode):
        text = text.decode(encoding)

    text = unicodedata.normalize('NFC', text)

    return text
views.py 文件源码 项目:Location_Assistance 作者: KamalAwasthi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def delete_friends(request):
    current_username = request.POST.get('username')
    current_friendName = request.POST.get('friendUsername')
    ol=[]
    try:
        existingUser = FriendList.objects.get(user__username = current_username)
        user_friends = existingUser.getfoo()
        for c in user_friends:
            c = unicodedata.normalize('NFKD', c).encode('ascii','ignore')
            if(c == current_friendName):
                continue
            ol.append(c)
        existingUser.friendList = json.dumps(ol)
        existingUser.save()
    except:
        ol=[]
    return HttpResponse(json.dumps(ol))
modified_cpplint.py 文件源码 项目:linter 作者: ethz-asl 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def append_utf8(self, text):
        try:
            from Naked.toolshed.system import file_exists
            if not file_exists(self.filepath):
                raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
            import codecs
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
                appender.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ gzip method (writer) ]
    #   writes data to gzip compressed file
    #   Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
    #   Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #               test_file_gzip_utf8_readwrite_explicit_decode
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def gzip(self, text, compression_level=6):
        try:
            import gzip
            if not self.filepath.endswith(".gz"):
                self.filepath = self.filepath + ".gz"
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(text)
        except UnicodeEncodeError as ue:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            import codecs
            binary_data = codecs.encode(norm_text, "utf_8")
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(binary_data)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ write method ]
    #   Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
    #   Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
    #    test_file_utf8_write_raises_unicodeerror
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def readlines_utf8(self):
        try:
            import codecs
            with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
                modified_text_list = []
                for line in uni_reader:
                    import unicodedata
                    norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
                    modified_text_list.append(norm_line)
                return modified_text_list
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
            raise e

    #------------------------------------------------------------------------------
    # [ read_gzip ] (byte string)
    #   reads data from a gzip compressed file
    #   returns the decompressed binary data from the file
    #   Note: if decompressing unicode file, set encoding="utf-8"
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #              test_file_read_gzip_missing_file
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 49 收藏 0 点赞 0 评论 0
def read_utf8(self):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='r')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            textstring = f.read()
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
            return norm_text
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def append_utf8(self, text):
        try:
            from Naked.toolshed.system import file_exists
            if not file_exists(self.filepath):
                raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
            import codecs
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
                appender.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ gzip method (writer) ]
    #   writes data to gzip compressed file
    #   Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
    #   Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #               test_file_gzip_utf8_readwrite_explicit_decode
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def gzip(self, text, compression_level=6):
        try:
            import gzip
            if not self.filepath.endswith(".gz"):
                self.filepath = self.filepath + ".gz"
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(text)
        except UnicodeEncodeError as ue:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            import codecs
            binary_data = codecs.encode(norm_text, "utf_8")
            with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
                gzip_writer.write(binary_data)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
            raise e

    #------------------------------------------------------------------------------
    # [ write method ]
    #   Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
    #   Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
    #    test_file_utf8_write_raises_unicodeerror
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def write_utf8(self, text):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='w')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for write with the write_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
            f.write(norm_text)
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to write UTF-8 encoded text to file with the write_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()

#------------------------------------------------------------------------------
# [ FileReader class ]
#  reads data from local files
#  filename assigned in constructor (inherited from IO class interface)
#------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def readlines_utf8(self):
        try:
            import codecs
            with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
                modified_text_list = []
                for line in uni_reader:
                    import unicodedata
                    norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
                    modified_text_list.append(norm_line)
                return modified_text_list
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
            raise e

    #------------------------------------------------------------------------------
    # [ read_gzip ] (byte string)
    #   reads data from a gzip compressed file
    #   returns the decompressed binary data from the file
    #   Note: if decompressing unicode file, set encoding="utf-8"
    #   Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
    #              test_file_read_gzip_missing_file
    #------------------------------------------------------------------------------
file.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def read_utf8(self):
        try:
            import codecs
            f = codecs.open(self.filepath, encoding='utf_8', mode='r')
        except IOError as ioe:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
            raise ioe
        try:
            textstring = f.read()
            import unicodedata
            norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
            return norm_text
        except Exception as e:
            if DEBUG_FLAG:
                sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
            raise e
        finally:
            f.close()
workflow.py 文件源码 项目:workflows.kyoyue 作者: wizyoung 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
workflow.py 文件源码 项目:alphy 作者: maximepeschard 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
formats.py 文件源码 项目:CodingDojo 作者: ComputerSocietyUNB 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def sanitize_separators(value):
    """
    Sanitizes a value according to the current decimal and
    thousand separator setting. Used with form field input.
    """
    if settings.USE_L10N and isinstance(value, six.string_types):
        parts = []
        decimal_separator = get_format('DECIMAL_SEPARATOR')
        if decimal_separator in value:
            value, decimals = value.split(decimal_separator, 1)
            parts.append(decimals)
        if settings.USE_THOUSAND_SEPARATOR:
            thousand_sep = get_format('THOUSAND_SEPARATOR')
            if thousand_sep == '.' and value.count('.') == 1 and len(value.split('.')[-1]) != 3:
                # Special case where we suspect a dot meant decimal separator (see #22171)
                pass
            else:
                for replacement in {
                        thousand_sep, unicodedata.normalize('NFKD', thousand_sep)}:
                    value = value.replace(replacement, '')
        parts.append(value)
        value = '.'.join(reversed(parts))
    return value
text.py 文件源码 项目:CodingDojo 作者: ComputerSocietyUNB 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)
LcdPicon.py 文件源码 项目:enigma2 作者: OpenLD 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def getLcdPiconName(serviceName):
    #remove the path and name fields, and replace ':' by '_'
    sname = '_'.join(GetWithAlternative(serviceName).split(':', 10)[:10])
    pngname = findLcdPicon(sname)
    if not pngname:
        fields = sname.split('_', 3)
        if len(fields) > 2 and fields[2] != '1': #fallback to 1 for services with different service types
            fields[2] = '1'
        if len(fields) > 0 and fields[0] != '1': #fallback to 1 for other reftypes
            fields[0] = '1'
        pngname = findLcdPicon('_'.join(fields))
    if not pngname: # picon by channel name
        name = ServiceReference(serviceName).getServiceName()
        name = unicodedata.normalize('NFKD', unicode(name, 'utf_8', errors='ignore')).encode('ASCII', 'ignore')
        name = re.sub('[^a-z0-9]', '', name.replace('&', 'and').replace('+', 'plus').replace('*', 'star').lower())
        if len(name) > 0:
            pngname = findLcdPicon(name)
            if not pngname and len(name) > 2 and name.endswith('hd'):
                pngname = findLcdPicon(name[:-2])
    return pngname
PiconL.py 文件源码 项目:enigma2 作者: OpenLD 项目源码 文件源码 阅读 66 收藏 0 点赞 0 评论 0
def getPiconLName(serviceName):
    #remove the path and name fields, and replace ':' by '_'
    sname = '_'.join(GetWithAlternative(serviceName).split(':', 10)[:10])
    pngname = findPiconL(sname)
    if not pngname:
        fields = sname.split('_', 3)
        if len(fields) > 2 and fields[2] != '2':
            #fallback to 1 for tv services with nonstandard servicetypes
            fields[2] = '1'
            pngname = findPiconL('_'.join(fields))
    if not pngname: # picon by channel name
        name = ServiceReference(serviceName).getServiceName()
        name = unicodedata.normalize('NFKD', unicode(name, 'utf_8', errors='ignore')).encode('ASCII', 'ignore')
        excludeChars = ['/', '\\', '\'', '"', '`', '?', ' ', '(', ')', ':', '<', '>', '|', '.', '\n']
        name = re.sub('[%s]' % ''.join(excludeChars), '', name)
        name = name.replace('&', 'and')
        name = name.replace('+', 'plus')
        name = name.replace('*', 'star')
        name = name.lower()
        if len(name) > 0:
            pngname = findPicon(name)
            if not pngname and len(name) > 2 and name.endswith('hd'):
                pngname = findPicon(name[:-2])
    return pngname
formatter.py 文件源码 项目:fscan 作者: danielmoraes 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def weekday_portuguese_to_english(string):
    string = string.lower()
    string = string.strip()
    string = string.replace("-", " ")
    string = ''.join((c for c in unicodedata.normalize('NFD', string)
                      if unicodedata.category(c) != 'Mn'))

    string = string.replace(",", " ")
    string = string.split(" ")[0]
    if string in [u"dom", u"domingo"]:
        return "Sunday"
    elif string in [u"seg", u"segunda", u"segunda-feira"]:
        return "Monday"
    elif string in [u"ter", u"terca", u"terça", u"terca-feira", u"terça-feira"]:
        return "Tuesday"
    elif string in [u"qua", u"quarta", u"quarta-feira"]:
        return "Wednesday"
    elif string in [u"qui", u"quinta", u"quinta-feira"]:
        return "Thursday"
    elif string in [u"sex", u"sexta", u"sexta-feira"]:
        return "Friday"
    elif string in [u"sab", u"sáb", u"sabado", u"sábado"]:
        return "Saturday"
utils.py 文件源码 项目:script.skin.helper.skinbackup 作者: marcelveldt 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def normalize_string(text):
    '''normalize string, strip all special chars'''
    text = text.replace(":", "")
    text = text.replace("/", "-")
    text = text.replace("\\", "-")
    text = text.replace("<", "")
    text = text.replace(">", "")
    text = text.replace("*", "")
    text = text.replace("?", "")
    text = text.replace('|', "")
    text = text.replace('(', "")
    text = text.replace(')', "")
    text = text.replace("\"", "")
    text = text.strip()
    text = text.rstrip('.')
    if not isinstance(text, unicode):
        text = text.decode("utf-8")
    text = unicodedata.normalize('NFKD', text)
    return text
__init__.py 文件源码 项目:GAMADV-XTD 作者: taers232c 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def to_unicode(source, encoding="utf-8", param="value"):
    """Helper to normalize input to unicode.

    :arg source:
        source bytes/unicode to process.

    :arg encoding:
        encoding to use when decoding bytes instances.

    :param param:
        optional name of variable/noun to reference when raising errors.

    :raises TypeError: if source is not unicode or bytes.

    :returns:
        * returns unicode strings unchanged.
        * returns bytes strings decoded using *encoding*
    """
    assert encoding
    if isinstance(source, unicode):
        return source
    elif isinstance(source, bytes):
        return source.decode(encoding)
    else:
        raise ExpectedStringError(source, param)
utils.py 文件源码 项目:MOSFiT 作者: guillochon 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def slugify(value, allow_unicode=False):
    """Slugify string to make it a valid filename.

    Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.
    Remove characters that aren't alphanumerics, underscores, or hyphens.
    Also strip leading and trailing whitespace.
    """
    import unicodedata
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode(
            'ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value).strip()
    return re.sub(r'[-\s]+', '-', value)


# Below from
# http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
workflow.py 文件源码 项目:GoToMeetingTools 作者: plongitudes 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
misc.py 文件源码 项目:lagbot 作者: mikevb1 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def charinfo(self, ctx, *, chars):
        """Get unicode character info."""
        if not chars:
            return
        chars = unicodedata.normalize('NFC', chars)
        if len(chars) > 25:
            await ctx.send('Too many emoji.')
            return
        embed = discord.Embed()
        for char in chars:
            uc = hex(ord(char))[2:]
            name = unicodedata.name(char, 'unknown')
            if name in {'SPACE', 'EM QUAD', 'EN QUAD'} or ' SPACE' in name:
                char = '" "'
            short = len(uc) <= 4
            code = f'`\\{"u" if short else "U"}{uc.lower().zfill(4 if short else 8)}`'
            embed.add_field(name=name,
                            value=f'{char} [{code}](http://www.fileformat.info/info/unicode/char/{uc}/index.htm)')
        await ctx.send(embed=embed)
workflow.py 文件源码 项目:behelper 作者: istommao 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))
workflow.py 文件源码 项目:radar 作者: amoose136 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))


问题


面经


文章

微信
公众号

扫码关注公众号