python类lookup()的实例源码

utilities.py 文件源码 项目:civet 作者: TheJacksonLaboratory 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cleanup_command_line():

    if not sys.stdin.encoding or sys.stdin.encoding == 'ascii':
        return

    conversion_pairs = {
        'EN DASH': '-',
        'EM DASH': '--',
        'LEFT DOUBLE QUOTATION MARK': '"',
        'RIGHT DOUBLE QUOTATION MARK': '"',
        'LEFT SINGLE QUOTATION MARK': "'",
        'RIGHT SINGLE QUOTATION MARK': "'",
    }

    for i in range(len(sys.argv)):
        # create a unicode string with the decoded contents of the corresponding
        # sys.argv string
        decoded = unicode(sys.argv[i], sys.stdin.encoding)
        for key, val in conversion_pairs.iteritems():
            decoded = unicode.replace(decoded, unicodedata.lookup(key), val)
        # Should we be doing 'strict' here instead of 'replace'?
        sys.argv[i] = decoded.encode(sys.stdin.encoding, 'replace')
_regex_core.py 文件源码 项目:oa_qian 作者: sunqb 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def parse_repl_named_char(source):
    "Parses a named character in a replacement string."
    saved_pos = source.pos
    if source.match("{"):
        name = source.get_while(ALPHA | set(" "))

        if source.match("}"):
            try:
                value = unicodedata.lookup(name)
                return ord(value)
            except KeyError:
                raise error("undefined character name", source.string,
                  source.pos)

    source.pos = saved_pos
    return None
completer.py 文件源码 项目:leetcode 作者: thomasyimgit 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base
        on the name of the character.

        This does  ``\\GREEK SMALL LETTER ETA`` -> ``?``

        Works only on valid python 3 identifier, or on combining characters that
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
sympy_parser.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _token_splittable(token):
    """
    Predicate for whether a token name can be split into multiple tokens.

    A token is splittable if it does not contain an underscore character and
    it is not the name of a Greek letter. This is used to implicitly convert
    expressions like 'xyz' into 'x*y*z'.
    """
    if '_' in token:
        return False
    else:
        try:
            return not unicodedata.lookup('GREEK SMALL LETTER ' + token)
        except KeyError:
            pass
    if len(token) > 1:
        return True
    return False
fun_stuff.py 文件源码 项目:espedite 作者: idimitrakopoulos 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, msg='', maxspin=0, minspin=10, speed=5):
        # Count of a spin
        self.count = 0
        self.out = sys.stdout
        self.flag = False
        self.max = maxspin
        self.min = minspin
        # Any message to print first ?
        self.msg = msg
        # Complete printed string
        self.string = ''
        # Speed is given as number of spins a second
        # Use it to calculate spin wait time
        self.waittime = 1.0 / float(speed * 4)
        if os.name == 'posix':
            self.spinchars = (unicodedata.lookup('FIGURE DASH'), u'\\ ', u'| ', u'/ ')
        else:
            # The unicode dash character does not show
            # up properly in Windows console.
            self.spinchars = (u'-', u'\\ ', u'| ', u'/ ')
        threading.Thread.__init__(self, None, None, "Spin Thread")
literals.py 文件源码 项目:chalktalk_docs 作者: loremIpsum1771 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def escape(m):
    all, tail = m.group(0, 1)
    assert all.startswith("\\")
    esc = simple_escapes.get(tail)
    if esc is not None:
        return esc
    elif tail.startswith("x"):
        return chr(convert_hex(tail, 2))
    elif tail.startswith('u'):
        return unichr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return unichr(convert_hex(tail, 8))
    elif tail.startswith('N'):
        import unicodedata
        try:
            return unicodedata.lookup(tail[1:-1])
        except KeyError:
            raise ValueError("undefined character name %r" % tail[1:-1])
    else:
        try:
            return chr(int(tail, 8))
        except ValueError:
            raise ValueError("invalid octal string escape ('\\%s')" % tail)
test_ucn.py 文件源码 项目:web_ctp 作者: molebot 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
test_ucn.py 文件源码 项目:web_ctp 作者: molebot 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.unicode.org/Public/%s/ucd/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (IOError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
paraparser.py 文件源码 项目:tichu-tournament 作者: aragos 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def start_unichar(self, attr):
        if 'name' in attr:
            if 'code' in attr:
                self._syntax_error('<unichar/> invalid with both name and code attributes')
            try:
                v = unicodedata.lookup(attr['name'])
            except KeyError:
                self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name']))
                v = '\0'
        elif 'code' in attr:
            try:
                v = int(eval(attr['code']))
                v = chr(v) if isPy3 else unichr(v)
            except:
                self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code']))
                v = '\0'
        else:
            v = None
            if attr:
                self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0])

        if v is not None:
            self.handle_data(v)
        self._push('unichar',_selfClosingTag='unichar')
completer.py 文件源码 项目:Repobot 作者: Desgard 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base 
        on the name of the character.

        This does  \\GREEK SMALL LETTER ETA -> ?

        Works only on valid python 3 identifier, or on combining characters that 
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
test_ucn.py 文件源码 项目:ouroboros 作者: pybee 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
test_ucn.py 文件源码 项目:ouroboros 作者: pybee 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (OSError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
latex2text.py 文件源码 项目:Menotexport 作者: Xunius 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def _greekletters(letterlist):
    for l in letterlist:
        ucharname = l.upper()
        if (ucharname == 'LAMBDA'):
            ucharname = 'LAMDA'
        smallname = "GREEK SMALL LETTER "+ucharname;
        if (ucharname == 'EPSILON'):
            smallname = "GREEK LUNATE EPSILON SYMBOL"
        if (ucharname == 'PHI'):
            smallname = "GREEK PHI SYMBOL"
        _default_macro_list.append(
            (l, unicodedata.lookup(smallname))
            );
        _default_macro_list.append(
            (l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
            );
triggerreact.py 文件源码 项目:Tobo-Cogs 作者: Tobotimus 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _check_files():
    if not dataIO.is_valid_json(TRIGGERS_PATH):
        _LOGGER.info("Creating json: " + TRIGGERS_PATH)
        dataIO.save_json(TRIGGERS_PATH, DEFAULT_SETTINGS)
    else: # Backwards compatibility check
        triggers = dataIO.load_json(TRIGGERS_PATH)
        for text, emoji_list in triggers['text_triggers'].items():
            for idx, emoji in enumerate(emoji_list):
                try:
                    emoji = lookup(emoji)
                except KeyError:
                    pass
                else:
                    emoji_list[idx] = emoji
            triggers['text_triggers'][text] = emoji_list
        for user, emoji_list in triggers['user_triggers'].items():
            for idx, emoji in enumerate(emoji_list):
                try:
                    emoji = lookup(emoji)
                except KeyError:
                    pass
                else:
                    emoji_list[idx] = emoji
            triggers['user_triggers'][user] = emoji_list
        dataIO.save_json(TRIGGERS_PATH, triggers)
sympy_parser.py 文件源码 项目:Python-iBeacon-Scan 作者: NikNitro 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _token_splittable(token):
    """
    Predicate for whether a token name can be split into multiple tokens.

    A token is splittable if it does not contain an underscore character and
    it is not the name of a Greek letter. This is used to implicitly convert
    expressions like 'xyz' into 'x*y*z'.
    """
    if '_' in token:
        return False
    else:
        try:
            return not unicodedata.lookup('GREEK SMALL LETTER ' + token)
        except KeyError:
            pass
    if len(token) > 1:
        return True
    return False
test_ucn.py 文件源码 项目:kbe_server 作者: xiaohaoppy 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_aliases(self):
        # Check that the aliases defined in the NameAliases.txt file work.
        # This should be updated when new aliases are added or the file
        # should be downloaded and parsed instead.  See #12753.
        aliases = [
            ('LATIN CAPITAL LETTER GHA', 0x01A2),
            ('LATIN SMALL LETTER GHA', 0x01A3),
            ('KANNADA LETTER LLLA', 0x0CDE),
            ('LAO LETTER FO FON', 0x0E9D),
            ('LAO LETTER FO FAY', 0x0E9F),
            ('LAO LETTER RO', 0x0EA3),
            ('LAO LETTER LO', 0x0EA5),
            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
            ('YI SYLLABLE ITERATION MARK', 0xA015),
            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
        ]
        for alias, codepoint in aliases:
            self.checkletter(alias, chr(codepoint))
            name = unicodedata.name(chr(codepoint))
            self.assertNotEqual(name, alias)
            self.assertEqual(unicodedata.lookup(alias),
                             unicodedata.lookup(name))
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(alias)
test_ucn.py 文件源码 项目:kbe_server 作者: xiaohaoppy 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_named_sequences_full(self):
        # Check all the named sequences
        url = ("http://www.unicode.org/Public/%s/ucd/NamedSequences.txt" %
               unicodedata.unidata_version)
        try:
            testdata = support.open_urlresource(url, encoding="utf-8",
                                                check=check_version)
        except (OSError, HTTPException):
            self.skipTest("Could not retrieve " + url)
        self.addCleanup(testdata.close)
        for line in testdata:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            seqname, codepoints = line.split(';')
            codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
            self.assertEqual(unicodedata.lookup(seqname), codepoints)
            with self.assertRaises(SyntaxError):
                self.checkletter(seqname, None)
            with self.assertRaises(KeyError):
                unicodedata.ucd_3_2_0.lookup(seqname)
completer.py 文件源码 项目:blender 作者: gastrodia 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base 
        on the name of the character.

        This does  \\GREEK SMALL LETTER ETA -> ?

        Works only on valid python 3 identifier, or on combining characters that 
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
completer.py 文件源码 项目:yatta_reader 作者: sound88 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def unicode_name_matches(self, text):
        u"""Match Latex-like syntax for unicode characters base
        on the name of the character.

        This does  ``\\GREEK SMALL LETTER ETA`` -> ``?``

        Works only on valid python 3 identifier, or on combining characters that
        will combine to form a valid identifier.

        Used on Python 3 only.
        """
        slashpos = text.rfind('\\')
        if slashpos > -1:
            s = text[slashpos+1:]
            try :
                unic = unicodedata.lookup(s)
                # allow combining chars
                if ('a'+unic).isidentifier():
                    return '\\'+s,[unic]
            except KeyError:
                pass
        return u'', []
recipe-286155.py 文件源码 项目:code 作者: ActiveState 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def insert_accented(self, c, accent):
        if c.isalpha():
            if c.isupper():
                cap = 'capital'
            else:
                cap = 'small'
            try:
                c = lookup("latin %s letter %c with %s" % (cap, c, accent))
                self.insert(INSERT, c)
                # Prevent plain letter from being inserted too, tell Tk to
                # stop handling this event
                return "break"
            except KeyError, e:
                pass
keymaps.py 文件源码 项目:bittyband 作者: yam655 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def expand_unicode(s):
    """ Convert unicode reference in to a Unicode string. """
    if s.startswith(r'\u') or s.startswith(r'\U'):
        return chr(int(s,16))
    if s.startswith(r'\N{'):
        name = s[3:-1]
        try:
            return unicodedata.lookup(name)
        except:
            raise ConfigError("Failed to find unicode value with name {}\n".format(name))
    else:
        return s
util.py 文件源码 项目:llk 作者: Tycx2ry 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def u(s):
        """Generate Unicode string from a string input, encoding Unicode characters.

        This is expected to work in the same way as u'<string>' would work in Python
        2.x (although it is not completely robust as it is based on a simple set of
        regexps).
        """
        us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
        us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
        us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
        return us
util.py 文件源码 项目:spiderfoot 作者: wi-fi-analyzer 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def u(s):
        """Generate Unicode string from a string input, encoding Unicode characters.

        This is expected to work in the same way as u'<string>' would work in Python
        2.x (although it is not completely robust as it is based on a simple set of
        regexps).
        """
        us = re.sub(_U16_RE, lambda m: unichr(int(m.group('hexval'), 16)), unicode(s))
        us = re.sub(_U32_RE, lambda m: unichr(int(m.group('hexval'), 16)), us)
        us = re.sub(_UNAME_RE, lambda m: unicodedata.lookup(m.group('name')), us)
        return us
coq_install_celex.py 文件源码 项目:coquery 作者: gkunter 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def dia_to_unicode(s):
    """
    Translates a string that contains CELEX encodings of diacritics to a
    Unicode string.

    Parameters
    ----------
    s : string
        A string containing CELEX diacritics (see CELEX/english/eol/README
        for details)

    Returns
    -------
    s : string
        The corresponding unicode string
    """

    encoded_diacritics = {
        "#": "COMBINING ACUTE ACCENT",
        "`": "COMBINING GRAVE ACCENT",
        '"': "COMBINING DIAERESIS",
        "^": "COMBINING CIRCUMFLEX ACCENT",
        ",": "COMBINING CEDILLA",
        "~": "COMBINING TILDE",
        "@": "COMBINING RING ABOVE"}

    diacritic = None
    char_list = []
    for ch in s:
        if ch in encoded_diacritics:
            diacritic = unicodedata.lookup(encoded_diacritics[ch])
        else:
            char_list.append(ch)
            # add diacritics:
            if diacritic:
                char_list.append(diacritic)
                diacritic = None
    # join and normalize characters:
    unicode_string = unicodedata.normalize("NFC", "".join(char_list))
    return unicode_string
_regex_core.py 文件源码 项目:oa_qian 作者: sunqb 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parse_named_char(source, info, in_set):
    "Parses a named character."
    saved_pos = source.pos
    if source.match("{"):
        name = source.get_while(NAMED_CHAR_PART)
        if source.match("}"):
            try:
                value = unicodedata.lookup(name)
                return make_character(info, ord(value), in_set)
            except KeyError:
                raise error("undefined character name", source.string,
                  source.pos)

    source.pos = saved_pos
    return make_character(info, ord("N"), in_set)
bre.py 文件源码 项目:backrefs 作者: facelessuser 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def unicode_name(self, name):
        """Insert Unicode value by its name."""

        value = ord(unicodedata.lookup(name))
        return '\\%03o' % value if value <= 0xFF else compat.uchr(value)
pretty_symbology.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def U(name):
        """unicode character by name or None if not found"""
        try:
            u = unicodedata.lookup(name)
        except KeyError:
            u = None

            global unicode_warnings
            unicode_warnings += 'No \'%s\' in unicodedata\n' % name

        return u
test_ucn.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_ascii_letters(self):
        import unicodedata

        for char in "".join(map(chr, range(ord("a"), ord("z")))):
            name = "LATIN SMALL LETTER %s" % char.upper()
            code = unicodedata.lookup(name)
            self.assertEqual(unicodedata.name(code), name)
test_ucn.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_bmp_characters(self):
        import unicodedata
        count = 0
        for code in range(0x10000):
            char = chr(code)
            name = unicodedata.name(char, None)
            if name is not None:
                self.assertEqual(unicodedata.lookup(name), char)
                count += 1
test_ucn.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_errors(self):
        import unicodedata
        self.assertRaises(TypeError, unicodedata.name)
        self.assertRaises(TypeError, unicodedata.name, 'xx')
        self.assertRaises(TypeError, unicodedata.lookup)
        self.assertRaises(KeyError, unicodedata.lookup, 'unknown')


问题


面经


文章

微信
公众号

扫码关注公众号