def setUp(self):
self.dataset = pd.read_csv('rosie/chamber_of_deputies/tests/fixtures/traveled_speeds_classifier.csv',
dtype={'recipient_id': np.str})
self.subject = TraveledSpeedsClassifier()
self.subject.fit(self.dataset)
python类str()的实例源码
def get_companies(self):
path = os.path.join(self.path, self.COMPANIES_DATASET)
dataset = pd.read_csv(path, dtype={'cnpj': np.str}, low_memory=False)
dataset['cnpj'] = dataset['cnpj'].str.replace(r'\D', '')
dataset['situation_date'] = pd.to_datetime(
dataset['situation_date'], errors='coerce')
return dataset
def load_test_data(ticker='000001'):
'''
Load test test_data for develop
:param ticker:
:return: ticker tradeDate turnoverVol closePrice highestPrice lowestPrice openPrice
'''
return pd.read_csv(BASE_DIR+'/tests/test_data/'+ticker+'.csv', dtype={"ticker": np.str}, index_col=0)
def load_mesh(filename):
"""
Open a json file and load the mesh into the target class
As long as there are no namespace conflicts, the target __class__
will be stored on the properties.HasProperties registry and may be
fetched from there.
:param str filename: name of file to read in
"""
with open(filename, 'r') as outfile:
jsondict = json.load(outfile)
data = BaseMesh.deserialize(jsondict, trusted=True)
return data
def _readUBC_3DMesh(TensorMesh, fileName):
"""Read UBC GIF 3D tensor mesh and generate same dimension TensorMesh.
:param string fileName: path to the UBC GIF mesh file
:rtype: TensorMesh
:return: The tensor mesh for the fileName.
"""
# Interal function to read cell size lines for the UBC mesh files.
def readCellLine(line):
line_list = []
for seg in line.split():
if '*' in seg:
sp = seg.split('*')
seg_arr = np.ones((int(sp[0]),)) * float(sp[1])
else:
seg_arr = np.array([float(seg)], float)
line_list.append(seg_arr)
return np.concatenate(line_list)
# Read the file as line strings, remove lines with comment = !
msh = np.genfromtxt(fileName, delimiter='\n', dtype=np.str, comments='!')
# Fist line is the size of the model
sizeM = np.array(msh[0].split(), dtype=float)
# Second line is the South-West-Top corner coordinates.
x0 = np.array(msh[1].split(), dtype=float)
# Read the cell sizes
h1 = readCellLine(msh[2])
h2 = readCellLine(msh[3])
h3temp = readCellLine(msh[4])
# Invert the indexing of the vector to start from the bottom.
h3 = h3temp[::-1]
# Adjust the reference point to the bottom south west corner
x0[2] = x0[2] - np.sum(h3)
# Make the mesh
tensMsh = TensorMesh([h1, h2, h3], x0=x0)
return tensMsh
def readUBC(TensorMesh, fileName, meshdim=None):
"""Wrapper to Read UBC GIF 2D and 3D tensor mesh and generate same dimension TensorMesh.
:param string fileName: path to the UBC GIF mesh file
:param int meshdim: expected dimension of the mesh, if unknown the default argument is None
:rtype: TensorMesh
:return: The tensor mesh for the fileName.
"""
# Check the expected mesh dimensions
if meshdim == None:
# Read the file as line strings, remove lines with comment = !
msh = np.genfromtxt(fileName, delimiter='\n', dtype=np.str, comments='!', max_rows=1)
# Fist line is the size of the model
sizeM = np.array(msh.ravel()[0].split(), dtype=float)
# Check if the mesh is a UBC 2D mesh
if sizeM.shape[0] == 1:
Tnsmsh = TensorMesh._readUBC_2DMesh(fileName)
# Check if the mesh is a UBC 3D mesh
elif sizeM.shape[0] == 3:
Tnsmsh = TensorMesh._readUBC_3DMesh(fileName)
else:
raise Exception('File format not recognized')
# expected dimension is 2
elif meshdim == 2:
Tnsmsh = TensorMesh._readUBC_2DMesh(fileName)
# expected dimension is 3
elif meshdim == 3:
Tnsmsh = TensorMesh._readUBC_3DMesh(fileName)
return Tnsmsh
def writeUBC(mesh, fileName, models=None):
"""Writes a TensorMesh to a UBC-GIF format mesh file.
:param string fileName: File to write to
:param dict models: A dictionary of the models
"""
assert mesh.dim == 3
s = ''
s += '{0:d} {1:d} {2:d}\n'.format(*tuple(mesh.vnC))
# Have to it in the same operation or use mesh.x0.copy(),
# otherwise the mesh.x0 is updated.
origin = mesh.x0 + np.array([0, 0, mesh.hz.sum()])
origin.dtype = float
s += '{0:.6f} {1:.6f} {2:.6f}\n'.format(*tuple(origin))
s += ('%.6f '*mesh.nCx+'\n')%tuple(mesh.hx)
s += ('%.6f '*mesh.nCy+'\n')%tuple(mesh.hy)
s += ('%.6f '*mesh.nCz+'\n')%tuple(mesh.hz[::-1])
f = open(fileName, 'w')
f.write(s)
f.close()
if models is None: return
assert type(models) is dict, 'models must be a dict'
for key in models:
assert type(key) is str, 'The dict key is a file name'
mesh.writeModelUBC(key, models[key])
def test_pickle_py2_bytes_encoding(self):
# Check that arrays and scalars pickled on Py2 are
# unpickleable on Py3 using encoding='bytes'
test_data = [
# (original, py2_pickle)
(np.unicode_('\u6f2c'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n"
"I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n.")),
(np.array([9e123], dtype=np.float64),
asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n"
"p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n"
"p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n"
"I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb.")),
(np.array([(9e123,)], dtype=[('name', float)]),
asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n"
"(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n"
"(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n"
"(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n"
"I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n"
"bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb.")),
]
if sys.version_info[:2] >= (3, 4):
# encoding='bytes' was added in Py3.4
for original, data in test_data:
result = pickle.loads(data, encoding='bytes')
assert_equal(result, original)
if isinstance(result, np.ndarray) and result.dtype.names:
for name in result.dtype.names:
assert_(isinstance(name, str))
def test_mem_on_invalid_dtype(self):
"Ticket #583"
self.assertRaises(ValueError, np.fromiter, [['12', ''], ['13', '']], str)
def test_sign_bit(self, level=rlevel):
x = np.array([0, -0.0, 0])
assert_equal(str(np.abs(x)), '[ 0. 0. 0.]')
def test_unaligned_unicode_access(self, level=rlevel):
# Ticket #825
for i in range(1, 9):
msg = 'unicode offset: %d chars' % i
t = np.dtype([('a', 'S%d' % i), ('b', 'U2')])
x = np.array([(asbytes('a'), sixu('b'))], dtype=t)
if sys.version_info[0] >= 3:
assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg)
else:
assert_equal(str(x), "[('a', u'b')]", err_msg=msg)
def test_zeros(self):
# Regression test for #1061.
# Set a size which cannot fit into a 64 bits signed integer
sz = 2 ** 64
good = 'Maximum allowed dimension exceeded'
try:
np.empty(sz)
except ValueError as e:
if not str(e) == good:
self.fail("Got msg '%s', expected '%s'" % (e, good))
except Exception as e:
self.fail("Got exception of type %s instead of ValueError" % type(e))
def test_eq_string_and_object_array(self):
# From e-mail thread "__eq__ with str and object" (Keith Goodman)
a1 = np.array(['a', 'b'], dtype=object)
a2 = np.array(['a', 'c'])
assert_array_equal(a1 == a2, [True, False])
assert_array_equal(a2 == a1, [True, False])
def test_refcount_error_in_clip(self):
# Ticket #1588
a = np.zeros((2,), dtype='>i2').clip(min=0)
x = a + a
# This used to segfault:
y = str(x)
# Check the final string:
assert_(y == "[0 0]")
def test_format_on_flex_array_element(self):
# Ticket #4369.
dt = np.dtype([('date', '<M8[D]'), ('val', '<f8')])
arr = np.array([('2000-01-01', 1)], dt)
formatted = '{0}'.format(arr[0])
assert_equal(formatted, str(arr[0]))
def test_run(self):
"""Only test hash runs at all."""
for t in [np.int, np.float, np.complex, np.int32, np.str, np.object,
np.unicode]:
dt = np.dtype(t)
hash(dt)
def test_dtypeattr(self):
assert_equal(self.one.dtype, np.dtype(np.int_))
assert_equal(self.three.dtype, np.dtype(np.float_))
assert_equal(self.one.dtype.char, 'l')
assert_equal(self.three.dtype.char, 'd')
self.assertTrue(self.three.dtype.str[0] in '<>')
assert_equal(self.one.dtype.str[1], 'i')
assert_equal(self.three.dtype.str[1], 'f')
def test_empty_subscript(self):
a, b = self.d
self.assertEqual(a[()], 0)
self.assertEqual(b[()], 'x')
self.assertTrue(type(a[()]) is a.dtype.type)
self.assertTrue(type(b[()]) is str)
def test_empty_unicode(self):
# don't throw decode errors on garbage memory
for i in range(5, 100, 5):
d = np.empty(i, dtype='U')
str(d)
def test_swapaxes(self):
a = np.arange(1*2*3*4).reshape(1, 2, 3, 4).copy()
idx = np.indices(a.shape)
assert_(a.flags['OWNDATA'])
b = a.copy()
# check exceptions
assert_raises(ValueError, a.swapaxes, -5, 0)
assert_raises(ValueError, a.swapaxes, 4, 0)
assert_raises(ValueError, a.swapaxes, 0, -5)
assert_raises(ValueError, a.swapaxes, 0, 4)
for i in range(-4, 4):
for j in range(-4, 4):
for k, src in enumerate((a, b)):
c = src.swapaxes(i, j)
# check shape
shape = list(src.shape)
shape[i] = src.shape[j]
shape[j] = src.shape[i]
assert_equal(c.shape, shape, str((i, j, k)))
# check array contents
i0, i1, i2, i3 = [dim-1 for dim in c.shape]
j0, j1, j2, j3 = [dim-1 for dim in src.shape]
assert_equal(src[idx[j0], idx[j1], idx[j2], idx[j3]],
c[idx[i0], idx[i1], idx[i2], idx[i3]],
str((i, j, k)))
# check a view is always returned, gh-5260
assert_(not c.flags['OWNDATA'], str((i, j, k)))
# check on non-contiguous input array
if k == 1:
b = c