python类UNICODE的实例源码

regex.py 文件源码 项目:mongodb-monitoring 作者: jruaux 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags
__init__.py 文件源码 项目:mongodb-monitoring 作者: jruaux 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def _encode_regex(name, value, dummy0, dummy1):
    """Encode a python regex or bson.regex.Regex."""
    flags = value.flags
    # Python 2 common case
    if flags == 0:
        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00"
    # Python 3 common case
    elif flags == re.UNICODE:
        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00"
    else:
        sflags = b""
        if flags & re.IGNORECASE:
            sflags += b"i"
        if flags & re.LOCALE:
            sflags += b"l"
        if flags & re.MULTILINE:
            sflags += b"m"
        if flags & re.DOTALL:
            sflags += b"s"
        if flags & re.UNICODE:
            sflags += b"u"
        if flags & re.VERBOSE:
            sflags += b"x"
        sflags += b"\x00"
        return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
base.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a TINYTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(TINYTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a MEDIUMTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(MEDIUMTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a LONGTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(LONGTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self, length=None, **kwargs):
        """Construct a VARCHAR.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(VARCHAR, self).__init__(length=length, **kwargs)
_stdlib.py 文件源码 项目:senf 作者: quodlibet 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def expandvars(path):
    """
    Args:
        path (pathlike): A path to expand
    Returns:
        `fsnative`

    Like :func:`python:os.path.expandvars` but supports unicode under Windows
    + Python 2 and always returns a `fsnative`.
    """

    path = path2fsn(path)

    def repl_func(match):
        return environ.get(match.group(1), match.group(0))

    path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path)
    if os.name == "nt":
        path = re.sub(r"%([^%]+)%", repl_func, path)
    return re.sub(r"\$\{([^\}]+)\}", repl_func, path)
utils.py 文件源码 项目:quartz-browser 作者: ksharindam 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def safe_filename(text, max_length=200):
    """Sanitizes filenames for many operating systems.

    :params text: The unsanitized pending filename.
    """

    # Tidy up ugly formatted filenames.
    text = text.replace('_', ' ')
    text = text.replace(':', ' -')

    # NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
    ntfs = [chr(i) for i in range(0, 31)]

    # Removing these SHOULD make most filename safe for a wide range of
    # operating systems.
    paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
                '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']

    blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
    filename = blacklist.sub('', text)
    return truncate(filename)
regex.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags
regex.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags
regex.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags
regex.py 文件源码 项目:noc-orchestrator 作者: DirceuSilvaLabs 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags
inlinepatterns.py 文件源码 项目:sublime-text-3-packages 作者: nickjj 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, pattern, markdown_instance=None):
        """
        Create an instant of an inline pattern.

        Keyword arguments:

        * pattern: A regular expression that matches a pattern

        """
        self.pattern = pattern
        self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern,
                                      re.DOTALL | re.UNICODE)

        # Api for Markdown to pass safe_mode into instance
        self.safe_mode = False
        if markdown_instance:
            self.markdown = markdown_instance
urlresolvers.py 文件源码 项目:CodingDojo 作者: ComputerSocietyUNB 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def regex(self):
        """
        Returns a compiled regular expression, depending upon the activated
        language-code.
        """
        language_code = get_language()
        if language_code not in self._regex_dict:
            if isinstance(self._regex, six.string_types):
                regex = self._regex
            else:
                regex = force_text(self._regex)
            try:
                compiled_regex = re.compile(regex, re.UNICODE)
            except re.error as e:
                raise ImproperlyConfigured(
                    '"%s" is not a valid regular expression: %s' %
                    (regex, six.text_type(e)))

            self._regex_dict[language_code] = compiled_regex
        return self._regex_dict[language_code]
__init__.py 文件源码 项目:gui_tool 作者: UAVCAN 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _do_match(self, text):
        if self.use_regex:
            try:
                flags = re.UNICODE
                if not self.case_sensitive:
                    flags |= re.IGNORECASE
                return bool(re.findall(self.pattern, text, flags=flags))
            except Exception as ex:
                logger.warning('Regular expression match failed', exc_info=True)
                raise self.BadPatternException(str(ex))
        else:
            if self.case_sensitive:
                pattern = self.pattern
            else:
                pattern = self.pattern.lower()
                text = text.lower()
            return pattern in text
woxikon_de_lookup.py 文件源码 项目:thesaurus_query.vim 作者: Ron89 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def obtainGroups(webcontent, groupNum):
    synonym_list = []
    for group in range(groupNum):
        while not re.search("synonyms-list-group", webcontent.readline(), re.UNICODE):
            continue
        meaning = re.search("Meaning: <b>([^<]+)</b>", webcontent.readline(), re.UNICODE).group(1)
        webcontent.readline() # </div> line
        webcontent.readline() # synonyms-list_content line
        sublist = webcontent.readline().split(',')
        subSynList = []
        for wordContainer in sublist:
            potential_synonym = re.search("<a href=[^>]+>([^<]+)</a>", wordContainer, re.UNICODE)
            if potential_synonym:
                subSynList.append(potential_synonym.group(1))
        synonym_list.append([meaning, subSynList])
    return synonym_list
woxikon_de_lookup.py 文件源码 项目:thesaurus_query.vim 作者: Ron89 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _parser(webcontent):
    end_tag_count=4
    pointer = webcontent.tell()
    end = len(webcontent.getvalue())
    while pointer<end:
        line_curr = webcontent.readline()
        found = re.search("Found ([0-9]+) synonym[ a-z]+([0-9]+) group", line_curr, re.UNICODE)
        notFound = re.search("<div class=\"no-results\">", line_curr, re.UNICODE)
        if found:
            groupNum = int(found.group(2))
            synonymNum = int(found.group(1))
            synonym_list = obtainGroups(webcontent, groupNum)
            webcontent.close()
            return synonym_list
        if notFound:
            webcontent.close()
            return []

    webcontent.close()
    return synonym_list
base.py 文件源码 项目:QXSConsolas 作者: qxsch 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a TINYTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(TINYTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:QXSConsolas 作者: qxsch 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a MEDIUMTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(MEDIUMTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:QXSConsolas 作者: qxsch 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, **kwargs):
        """Construct a LONGTEXT.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(LONGTEXT, self).__init__(**kwargs)
base.py 文件源码 项目:QXSConsolas 作者: qxsch 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self, length=None, **kwargs):
        """Construct a VARCHAR.

        :param charset: Optional, a column-level character set for this string
          value.  Takes precedence to 'ascii' or 'unicode' short-hand.

        :param collation: Optional, a column-level collation for this string
          value.  Takes precedence to 'binary' short-hand.

        :param ascii: Defaults to False: short-hand for the ``latin1``
          character set, generates ASCII in schema.

        :param unicode: Defaults to False: short-hand for the ``ucs2``
          character set, generates UNICODE in schema.

        :param national: Optional. If true, use the server's configured
          national character set.

        :param binary: Defaults to False: short-hand, pick the binary
          collation type that matches the column's character set.  Generates
          BINARY in schema.  This does not affect the type of data stored,
          only the collation of character data.

        """
        super(VARCHAR, self).__init__(length=length, **kwargs)
reference.py 文件源码 项目:TuShare 作者: andyzsf 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _fun_into(x):
    if ct.PY3:
            reg1 = re.compile(r'??(.*?)?', re.UNICODE)
            reg2 = re.compile(r'??(.*?)?', re.UNICODE)
            res1 = reg1.findall(x)
            res2 = reg2.findall(x)
            res1 = 0 if len(res1)<1 else float(res1[0])
            res2 = 0 if len(res2)<1 else float(res2[0])
            return res1 + res2
    else:
        if isinstance(x, unicode):
            s1 = unicode('??','utf-8')
            s2 = unicode('??','utf-8')
            s3 = unicode('?','utf-8')
            reg1 = re.compile(r'%s(.*?)%s'%(s1, s3), re.UNICODE)
            reg2 = re.compile(r'%s(.*?)%s'%(s2, s3), re.UNICODE)
            res1 = reg1.findall(x)
            res2 = reg2.findall(x)
            res1 = 0 if len(res1)<1 else float(res1[0])
            res2 = 0 if len(res2)<1 else float(res2[0])
            return res1 + res2
        else:
            return 0
validators.py 文件源码 项目:touch-pay-client 作者: HackPucBemobi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, expression, error_message='Invalid expression',
                 strict=False, search=False, extract=False,
                 is_unicode=False):

        if strict or not search:
            if not expression.startswith('^'):
                expression = '^(%s)' % expression
        if strict:
            if not expression.endswith('$'):
                expression = '(%s)$' % expression
        if is_unicode:
            if not isinstance(expression, unicodeT):
                expression = expression.decode('utf8')
            self.regex = re.compile(expression, re.UNICODE)
        else:
            self.regex = re.compile(expression)
        self.error_message = error_message
        self.extract = extract
        self.is_unicode = is_unicode or (not(PY2))
pygments.py 文件源码 项目:transpyler 作者: Transpyler 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def transpyler_lexer_factory(transpyler):
    """
    Return a Pygments lexer class for the given transpyler.
    """

    def analyse_text(text):
        return shebang_matches(text, r'pythonw?3(\.\d)?')

    return type(
        transpyler.pygments_class_name,
        (Python3Lexer,),
        dict(
            analyse_text=analyse_text,
            name=transpyler.name,
            aliases=[transpyler.display_name],
            filenames=transpyler.file_extensions,
            mimetypes=transpyler.mimetypes,
            flags=re.MULTILINE | re.UNICODE,
            uni_name="[%s][%s]*" % (uni.xid_start, uni.xid_continue),
            tokens=make_transpyled_tokens(transpyler),
        )
    )
states.py 文件源码 项目:aws-cfn-plex 作者: lordmuffin 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def build_regexp(definition, compile=True):
    """
    Build, compile and return a regular expression based on `definition`.

    :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
        where "parts" is a list of regular expressions and/or regular
        expression definitions to be joined into an or-group.
    """
    name, prefix, suffix, parts = definition
    part_strings = []
    for part in parts:
        if type(part) is tuple:
            part_strings.append(build_regexp(part, None))
        else:
            part_strings.append(part)
    or_group = '|'.join(part_strings)
    regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
    if compile:
        return re.compile(regexp, re.UNICODE)
    else:
        return regexp
string_test.py 文件源码 项目:filters 作者: eflglobal 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_pass_precompiled_regex(self):
        """
        You can alternatively provide a precompiled regex to the Filter
        instead of a string pattern.
        """
        # Compile our own pattern so that we can specify the
        # ``IGNORECASE`` flag.
        # Note that you are responsible for adding the ``UNICODE`` flag
        # to your compiled regex!
        # noinspection SpellCheckingInspection
        pattern = re.compile(r'\btest\b', re.IGNORECASE | re.UNICODE)

        self.assertFilterPasses(
            self._filter('test march of the TEST penguins', pattern=pattern),
            ['test', 'TEST'],
        )
string_test.py 文件源码 项目:filters 作者: eflglobal 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_pass_regex_library_support(self):
        """
        The Regex Filter also supports precompiled patterns using the
        ``regex`` library.
        """
        # Roughly, "Hi there!" in Burmese.
        word = '\u101f\u102d\u102f\u1004\u103a\u1038'

        # Note that :py:func:`regex.compile` automatically adds the
        # ``UNICODE`` flag for you when the pattern is a unicode.
        pattern = regex.compile(r'\w+')

        self.assertFilterPasses(
            self._filter(word, pattern=pattern),
            [word],
        )
string_test.py 文件源码 项目:filters 作者: eflglobal 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_pass_precompiled_regex(self):
        """
        You can alternatively provide a precompiled regex to the Filter
        instead of a string pattern.
        """
        # Compile our own pattern so that we can specify the
        # ``IGNORECASE`` flag.
        # Note that you are responsible for adding the ``UNICODE`` flag
        # to your compiled regex!
        # noinspection SpellCheckingInspection
        pattern = re.compile(r'\btest\b', re.IGNORECASE | re.UNICODE)

        self.assertFilterPasses(
            self._filter('test march of the TEST penguins', pattern=pattern),
            ['', ' march of the ', ' penguins'],
        )
string_test.py 文件源码 项目:filters 作者: eflglobal 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_pass_regex_library_support(self):
        """
        The Regex Filter also supports precompiled patterns using the
        ``regex`` library.
        """
        # Roughly, "Hi there!" in Burmese.
        word = '\u101f\u102d\u102f\u1004\u103a\u1038!'

        # Note that :py:func:`regex.compile` automatically adds the
        # ``UNICODE`` flag for you when the pattern is a unicode.
        pattern = regex.compile(r'\w+')

        self.assertFilterPasses(
            self._filter(word, pattern=pattern),
            ['', '!'],
        )
states.py 文件源码 项目:AshsSDK 作者: thehappydinoa 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def build_regexp(definition, compile=True):
    """
    Build, compile and return a regular expression based on `definition`.

    :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
        where "parts" is a list of regular expressions and/or regular
        expression definitions to be joined into an or-group.
    """
    name, prefix, suffix, parts = definition
    part_strings = []
    for part in parts:
        if type(part) is tuple:
            part_strings.append(build_regexp(part, None))
        else:
            part_strings.append(part)
    or_group = '|'.join(part_strings)
    regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
    if compile:
        return re.compile(regexp, re.UNICODE)
    else:
        return regexp


问题


面经


文章

微信
公众号

扫码关注公众号