python类unicode_()的实例源码-面圈网

test_defchararray.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

test_defchararray.py 文件源码项目：krpcScripts 作者: jwvanderbeck 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：krpcScripts 作者: jwvanderbeck 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：krpcScripts 作者: jwvanderbeck 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：krpcScripts 作者: jwvanderbeck 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

test_dtypes.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_select_dtypes_str_raises(self):
        df = DataFrame({'a': list('abc'),
                        'g': list(u('abc')),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.date_range('now', periods=3).values})
        string_dtypes = set((str, 'str', np.string_, 'S1',
                             'unicode', np.unicode_, 'U1'))
        try:
            string_dtypes.add(unicode)
        except NameError:
            pass
        for dt in string_dtypes:
            with tm.assertRaisesRegexp(TypeError,
                                       'string dtypes are not allowed'):
                df.select_dtypes(include=[dt])
            with tm.assertRaisesRegexp(TypeError,
                                       'string dtypes are not allowed'):
                df.select_dtypes(exclude=[dt])

test_defchararray.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def test_lstrip(self):
        tgt = asbytes_nested([['abc ', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345 \0 ', 'UPPER']])
        assert_(issubclass(self.A.lstrip().dtype.type, np.string_))
        assert_array_equal(self.A.lstrip(), tgt)

        tgt = asbytes_nested([[' abc', ''],
                              ['2345', 'ixedCase'],
                              ['23 \t 345 \x00', 'UPPER']])
        assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])), tgt)

        tgt = [[sixu('\u03a3 '), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345 \0 ', 'UPPER']]
        assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.lstrip(), tgt)

test_defchararray.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

test_defchararray.py 文件源码项目：aws-lambda-numpy 作者: vitolimandibhrata 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：aws-lambda-numpy 作者: vitolimandibhrata 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：aws-lambda-numpy 作者: vitolimandibhrata 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：aws-lambda-numpy 作者: vitolimandibhrata 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

test_defchararray.py 文件源码项目：lambda-numba 作者: rlhotovy 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：lambda-numba 作者: rlhotovy 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：lambda-numba 作者: rlhotovy 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：lambda-numba 作者: rlhotovy 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

test_defchararray.py 文件源码项目：deliver 作者: orchestor 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：deliver 作者: orchestor 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)

test_defchararray.py 文件源码项目：deliver 作者: orchestor 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt)

test_defchararray.py 文件源码项目：deliver 作者: orchestor 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt)

normalize.py 文件源码项目：loompy 作者: linnarsson-lab 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
    """
    Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
    """
    if np.issubdtype(a.dtype, np.object_):
        if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
            return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
        elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
            return a.astype("string_")
        else:
            print(type(a[0]))
            raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
    elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
        return a
    elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
        return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
    else:
        raise ValueError("String values must be object, ascii or unicode.")

normalize.py 文件源码项目：loompy 作者: linnarsson-lab 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def materialize_attr_values(a: np.ndarray) -> np.ndarray:
    scalar = False
    if np.isscalar(a):
        scalar = True
        a = np.array([a])
    result: np.ndarray = None
    if np.issubdtype(a.dtype, np.string_):
        # First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
        temp = np.array([x.decode('ascii', 'ignore') for x in a])
        # Then unescape XML entities and convert to unicode
        result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_)
    elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
        result = np.array(a.astype(str), dtype=np.str_)
    else:
        result = a
    if scalar:
        return result[0]
    else:
        return result

test_defchararray.py 文件源码项目：Alfred 作者: jkachhadia 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail)

test_defchararray.py 文件源码项目：Alfred 作者: jkachhadia 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt)