def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
python类unicode_()的实例源码
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
test_dtypes.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_select_dtypes_str_raises(self):
df = DataFrame({'a': list('abc'),
'g': list(u('abc')),
'b': list(range(1, 4)),
'c': np.arange(3, 6).astype('u1'),
'd': np.arange(4.0, 7.0, dtype='float64'),
'e': [True, False, True],
'f': pd.date_range('now', periods=3).values})
string_dtypes = set((str, 'str', np.string_, 'S1',
'unicode', np.unicode_, 'U1'))
try:
string_dtypes.add(unicode)
except NameError:
pass
for dt in string_dtypes:
with tm.assertRaisesRegexp(TypeError,
'string dtypes are not allowed'):
df.select_dtypes(include=[dt])
with tm.assertRaisesRegexp(TypeError,
'string dtypes are not allowed'):
df.select_dtypes(exclude=[dt])
test_defchararray.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
test_defchararray.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
test_defchararray.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 49
收藏 0
点赞 0
评论 0
def test_lstrip(self):
tgt = asbytes_nested([['abc ', ''],
['12345', 'MixedCase'],
['123 \t 345 \0 ', 'UPPER']])
assert_(issubclass(self.A.lstrip().dtype.type, np.string_))
assert_array_equal(self.A.lstrip(), tgt)
tgt = asbytes_nested([[' abc', ''],
['2345', 'ixedCase'],
['23 \t 345 \x00', 'UPPER']])
assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])), tgt)
tgt = [[sixu('\u03a3 '), ''],
['12345', 'MixedCase'],
['123 \t 345 \0 ', 'UPPER']]
assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.lstrip(), tgt)
test_defchararray.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 40
收藏 0
点赞 0
评论 0
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
test_defchararray.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
def materialize_attr_values(a: np.ndarray) -> np.ndarray:
scalar = False
if np.isscalar(a):
scalar = True
a = np.array([a])
result: np.ndarray = None
if np.issubdtype(a.dtype, np.string_):
# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
temp = np.array([x.decode('ascii', 'ignore') for x in a])
# Then unescape XML entities and convert to unicode
result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_)
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
result = np.array(a.astype(str), dtype=np.str_)
else:
result = a
if scalar:
return result[0]
else:
return result
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)