def test_scalar_none_comparison(self):
# Scalars should still just return False and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentation purposes, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations are done.
assert_(np.equal(np.datetime64('NaT'), None))
python类str_()的实例源码
def test_scalar_none_comparison(self):
# Scalars should still just return False and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentation purposes, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations are done.
assert_(np.equal(np.datetime64('NaT'), None))
def test_stratified_batches():
data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)],
dtype=[('x', np.str_, 8), ('y', np.int32)])
assert list(data['x']) == ['a', 'b', 'c', 'd', 'e']
assert list(data['y']) == [-1, 0, 1, -1, -1]
batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1)
first_ten_batches = list(islice(batch_generator, 10))
labeled_batch_portions = [batch[:1] for batch in first_ten_batches]
unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches]
labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5)
unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4)
assert ([sorted(items['x'].tolist()) for items in labeled_epochs] ==
[['b', 'c']] * 5)
assert ([sorted(items['y'].tolist()) for items in labeled_epochs] ==
[[0, 1]] * 5)
assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] ==
[['a', 'b', 'c', 'd', 'e']] * 4)
assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] ==
[[-1, -1, -1, -1, -1]] * 4)
def discrete(self, x, bin=5):
#res = np.array([0] * x.shape[-1], dtype=int)
#?????????????????????WOE?????????????<=?WOE??
x_copy = pd.Series.copy(x)
x_copy = x_copy.astype(str)
#x_copy = x_copy.astype(np.str_)
#x_copy = x
x_gt0 = x[x>=0]
#if x.name == 'TD_PLTF_CNT_1M':
#bin = 5
#x_gt0 = x[(x>=0) & (x<=24)]
for i in range(bin):
point1 = stats.scoreatpercentile(x_gt0, i * (100.0/bin))
point2 = stats.scoreatpercentile(x_gt0, (i + 1) * (100.0/bin))
x1 = x[(x >= point1) & (x <= point2)]
mask = np.in1d(x, x1)
#x_copy[mask] = i + 1
x_copy[mask] = '%s-%s' % (point1,point2)
#x_copy[mask] = point1
#print x_copy[mask]
#print x
#print x
return x_copy
def grade(self, x, bin=5):
#res = np.array([0] * x.shape[-1], dtype=int)
#?????????????????????WOE?????????????<=?WOE??
x_copy = np.copy(x)
#x_copy = x_copy.astype(str)
#x_copy = x_copy.astype(np.str_)
#x_copy = x
x_gt0 = x[x>=0]
for i in range(bin):
point1 = stats.scoreatpercentile(x_gt0, i * (100.0/bin))
point2 = stats.scoreatpercentile(x_gt0, (i + 1) * (100.0/bin))
x1 = x[(x >= point1) & (x <= point2)]
mask = np.in1d(x, x1)
#x_copy[mask] = i + 1
x_copy[mask] = i + 1
#x_copy[mask] = point1
#print x_copy[mask]
#print x
print point1,point2
#print x
return x_copy
test_deprecations.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_scalar_none_comparison(self):
# Scalars should still just return false and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentaiton purpose, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations is done.
assert_(np.equal(np.datetime64('NaT'), None))
def test_scalar_none_comparison(self):
# Scalars should still just return false and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentaiton purpose, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations is done.
assert_(np.equal(np.datetime64('NaT'), None))
def test_scalar_none_comparison(self):
# Scalars should still just return False and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentation purposes, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations are done.
assert_(np.equal(np.datetime64('NaT'), None))
def test_scalar_none_comparison(self):
# Scalars should still just return False and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentation purposes, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations are done.
assert_(np.equal(np.datetime64('NaT'), None))
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
def materialize_attr_values(a: np.ndarray) -> np.ndarray:
scalar = False
if np.isscalar(a):
scalar = True
a = np.array([a])
result: np.ndarray = None
if np.issubdtype(a.dtype, np.string_):
# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
temp = np.array([x.decode('ascii', 'ignore') for x in a])
# Then unescape XML entities and convert to unicode
result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_)
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
result = np.array(a.astype(str), dtype=np.str_)
else:
result = a
if scalar:
return result[0]
else:
return result
def npy2py_type(npy_type):
int_types = [
np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64,
np.uint8, np.uint16, np.uint32, np.uint64
]
float_types = [np.float_, np.float16, np.float32, np.float64]
bytes_types = [np.str_, np.string_]
if npy_type in int_types:
return int
if npy_type in float_types:
return float
if npy_type in bytes_types:
return bytes
if hasattr(npy_type, 'char'):
if npy_type.char in ['S', 'a']:
return bytes
raise TypeError
return npy_type
def test_scalar_none_comparison(self):
# Scalars should still just return False and not give a warnings.
# The comparisons are flagged by pep8, ignore that.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', FutureWarning)
assert_(not np.float32(1) == None)
assert_(not np.str_('test') == None)
# This is dubious (see below):
assert_(not np.datetime64('NaT') == None)
assert_(np.float32(1) != None)
assert_(np.str_('test') != None)
# This is dubious (see below):
assert_(np.datetime64('NaT') != None)
assert_(len(w) == 0)
# For documentation purposes, this is why the datetime is dubious.
# At the time of deprecation this was no behaviour change, but
# it has to be considered when the deprecations are done.
assert_(np.equal(np.datetime64('NaT'), None))
def initialize(self):
"""Initialize FixPandasDataFrame"""
self.check_arg_types(read_key=str, store_key=str)
self.check_arg_types(recurse=True, allow_none=True, original_columns=str)
self.check_arg_vals('read_key')
if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool):
raise AssertionError('cleanup_string_columns should be a list of column names or boolean.')
if self.read_key == self.store_key:
self.inplace = True
self.log().debug('store_key equals read_key; inplace has been set to "True"')
if self.inplace:
self.store_key = self.read_key
self.log().debug('store_key has been set to read_key "%s"', self.store_key)
if not self.store_key:
self.store_key = self.read_key + '_fix'
self.log().debug('store_key has been set to "%s"', self.store_key)
# check data types
for k in self.var_dtype.keys():
if k not in self.contaminated_columns:
self.contaminated_columns.append(k)
try:
# convert to consistent types
dt = np.dtype(self.var_dtype[k]).type
if dt is np.str_ or dt is np.object_:
dt = str
self.var_dtype[k] = dt
except BaseException:
raise TypeError('unknown assigned datatype to variable "%s"' % k)
return StatusCode.Success
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def test_string(self):
lr = LogisticRegression()
for col in ['features', u'features', np.str_('features')]:
lr.setFeaturesCol(col)
self.assertEqual(lr.getFeaturesCol(), 'features')
self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
def _can_convert_to_string(value):
vtype = type(value)
return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
def toString(value):
"""
Convert a value to a string, if possible.
"""
if isinstance(value, basestring):
return value
elif type(value) in [np.string_, np.str_]:
return str(value)
elif type(value) == np.unicode_:
return unicode(value)
else:
raise TypeError("Could not convert %s to string type" % type(value))
def symbols_to_numbers(symbols):
"""Given element symbol(s), return the atomic number(s) (number of protons).
Args:
symbols (str or list of str): Atomic symbol(s).
Returns:
ndarray: Atomic number(s) (number of protons).
Raises:
ValueError: If a given atomic symbol is invalid and doesn't have a
corresponding number.
"""
single_value = False
if isinstance(symbols, (str, np.str_)):
symbols = [symbols]
single_value = True
numbers = []
for symbol in symbols:
number = SYMBOL_TO_NUMBER_MAP.get(symbol)
if number is None:
raise ValueError(
"Given atomic symbol {} is invalid and doesn't have a number "
"associated with it.".format(symbol)
)
numbers.append(number)
return numbers[0] if single_value else np.array(numbers)
def init_list(self):
if self.fname is '' or not os.path.isfile(self.fname):
sys.stderr.write('Initializing empty class list\n')
self.clist = np.zeros((self.num_frames,), dtype=np.str_)
else:
self.clist = self.load()
self.key, self.key_pos, self.key_counts = np.unique(self.clist, return_inverse=True, return_counts=True)
def main():
net = caffe.Net(MODEL_DEF, MODEL_WEIGHT, caffe.TRAIN)
mat = []
for i in range(len(net.layers)):
mat_type = net.layers[i].type
mat_data = []
for j in range(len(net.layers[i].blobs)):
mat_data.append(net.layers[i].blobs[j].data)
mat.append((mat_type, mat_data))
dt = np.dtype([('type', np.str_, 16), ('data', np.ndarray)])
results = np.array(mat, dtype=dt)
results.dump(MAT_RESULT)
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def encode_ascii(s):
if isinstance(s, str):
return s.encode('ascii')
elif isinstance(s, numpy.ndarray) and \
issubclass(s.dtype.type, numpy.str_):
ns = numpy.char.encode(s, 'ascii').view(type(s))
if ns.dtype.itemsize != s.dtype.itemsize / 4:
ns = ns.astype((numpy.bytes_, s.dtype.itemsize / 4))
return ns
return s
def decode_ascii(s):
if isinstance(s, bytes):
return s.decode('ascii')
elif (isinstance(s, numpy.ndarray) and
issubclass(s.dtype.type, numpy.bytes_)):
# np.char.encode/decode annoyingly don't preserve the type of the
# array, hence the view() call
# It also doesn't necessarily preserve widths of the strings,
# hence the astype()
ns = numpy.char.decode(s, 'ascii').view(type(s))
if ns.dtype.itemsize / 4 != s.dtype.itemsize:
ns = ns.astype((numpy.str_, s.dtype.itemsize))
return ns
return s
def regroup(df,column,split_points):
for i in range(len(split_points)-1):
df[column][(df[column]>=split_points[i]) & (df[column]<=split_points[i+1])] = '%s-%s' % (split_points[i],split_points[i+1])
df[column] = df[column].astype(np.str_)
test_dtypes.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_astype_str(self):
# GH4405
digits = string.digits
s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)])
s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0])
types = (compat.text_type, np.str_)
for typ in types:
for s in (s1, s2):
res = s.astype(typ)
expec = s.map(compat.text_type)
assert_series_equal(res, expec)
# GH9757
# Test str and unicode on python 2.x and just str on python 3.x
for tt in set([str, compat.text_type]):
ts = Series([Timestamp('2010-01-04 00:00:00')])
s = ts.astype(tt)
expected = Series([tt('2010-01-04')])
assert_series_equal(s, expected)
ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
s = ts.astype(tt)
expected = Series([tt('2010-01-04 00:00:00-05:00')])
assert_series_equal(s, expected)
td = Series([Timedelta(1, unit='d')])
s = td.astype(tt)
expected = Series([tt('1 days 00:00:00.000000000')])
assert_series_equal(s, expected)
test_constructors.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_constructor_empty_with_string_dtype(self):
# GH 9428
expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
assert_frame_equal(df, expected)
df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
assert_frame_equal(df, expected)
df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_)
assert_frame_equal(df, expected)
df = DataFrame(index=[0, 1], columns=[0, 1], dtype='U5')
assert_frame_equal(df, expected)
test_dtypes.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_numpy_informed(self):
# np.dtype doesn't know about our new dtype
def f():
np.dtype(self.dtype)
self.assertRaises(TypeError, f)
self.assertNotEqual(self.dtype, np.str_)
self.assertNotEqual(np.str_, self.dtype)
test_lib.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_isscalar_numpy_array_scalars(self):
self.assertTrue(lib.isscalar(np.int64(1)))
self.assertTrue(lib.isscalar(np.float64(1.)))
self.assertTrue(lib.isscalar(np.int32(1)))
self.assertTrue(lib.isscalar(np.object_('foobar')))
self.assertTrue(lib.isscalar(np.str_('foobar')))
self.assertTrue(lib.isscalar(np.unicode_(u('foobar'))))
self.assertTrue(lib.isscalar(np.bytes_(b'foobar')))
self.assertTrue(lib.isscalar(np.datetime64('2014-01-01')))
self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))
test_regression.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)