def test_dtype_mix(self):
c = np.array([False, True, False, False, False, False, True, False,
False, False, True, False])
a = np.uint32(1)
b = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.],
dtype=np.float64)
r = np.array([5., 1., 3., 2., -1., -4., 1., -10., 10., 1., 1., 3.],
dtype=np.float64)
assert_equal(np.where(c, a, b), r)
a = a.astype(np.float32)
b = b.astype(np.int64)
assert_equal(np.where(c, a, b), r)
# non bool mask
c = c.astype(np.int)
c[c != 0] = 34242324
assert_equal(np.where(c, a, b), r)
# invert
tmpmask = c != 0
c[c == 0] = 41247212
c[tmpmask] = 0
assert_equal(np.where(c, b, a), r)
python类int64()的实例源码
def test_einsum_misc(self):
# This call used to crash because of a bug in
# PyArray_AssignZero
a = np.ones((1, 2))
b = np.ones((2, 2, 1))
assert_equal(np.einsum('ij...,j...->i...', a, b), [[[2], [2]]])
# The iterator had an issue with buffering this reduction
a = np.ones((5, 12, 4, 2, 3), np.int64)
b = np.ones((5, 12, 11), np.int64)
assert_equal(np.einsum('ijklm,ijn,ijn->', a, b, b),
np.einsum('ijklm,ijn->', a, b))
# Issue #2027, was a problem in the contiguous 3-argument
# inner loop implementation
a = np.arange(1, 3)
b = np.arange(1, 5).reshape(2, 2)
c = np.arange(1, 9).reshape(4, 2)
assert_equal(np.einsum('x,yx,zx->xzy', a, b, c),
[[[1, 3], [3, 9], [5, 15], [7, 21]],
[[8, 16], [16, 32], [24, 48], [32, 64]]])
def test_datetime_y2038(self):
# Test parsing on either side of the Y2038 boundary
a = np.datetime64('2038-01-19T03:14:07')
assert_equal(a.view(np.int64), 2**31 - 1)
a = np.datetime64('2038-01-19T03:14:08')
assert_equal(a.view(np.int64), 2**31)
# Test parsing on either side of the Y2038 boundary with
# a manually specified timezone offset
with assert_warns(DeprecationWarning):
a = np.datetime64('2038-01-19T04:14:07+0100')
assert_equal(a.view(np.int64), 2**31 - 1)
with assert_warns(DeprecationWarning):
a = np.datetime64('2038-01-19T04:14:08+0100')
assert_equal(a.view(np.int64), 2**31)
# Test parsing a date after Y2038
a = np.datetime64('2038-01-20T13:21:14')
assert_equal(str(a), '2038-01-20T13:21:14')
def main():
parser = argparse.ArgumentParser(description="Generate the beta abd theta files after latent Dirichlet allocation (LDA) process.");
parser.add_argument('-i', '--input', required=True, help="The input file where each line starts with the number of word as well as the sparse representation of word distribution");
parser.add_argument('-o', '--output', required=True, help="The output path");
args = parser.parse_args();
tfidf = pickle.load(open(args.input));
feat = tfidf.toarray().astype(np.int64);
model = lda.LDA(n_topics=50, n_iter=1500, random_state=2017);
model.fit(feat);
fid = open(os.path.join(args.output, 'init.beta'), 'w');
beta = model.topic_word_;
for row in range(beta.shape[0]):
fid.write('%f'%beta[row,0]);
for col in range(1, beta.shape[1]):
fid.write(' %f'%beta[row,col]);
fid.write('\n');
fid.close();
fid = open(os.path.join(args.output, 'init.theta'), 'w');
theta = model.doc_topic_
for row in range(theta.shape[0]):
fid.write('%f'%theta[row,0]);
for col in range(1, theta.shape[1]):
fid.write(' %f'%theta[row,col]);
fid.write('\n');
fid.close();
def test_sparse_dot(self):
x_d = np.array([0, 7, 2, 3], dtype=np.float32)
x_r = np.array([0, 2, 2, 3], dtype=np.int64)
x_c = np.array([4, 3, 2, 3], dtype=np.int64)
x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5))
x_dense = x_sparse.toarray()
W = np.random.random((5, 4))
backends = [KTF]
if KTH.th_sparse_module:
# Theano has some dependency issues for sparse
backends.append(KTH)
for K in backends:
t_W = K.variable(W)
k_s = K.eval(K.dot(K.variable(x_sparse), t_W))
k_d = K.eval(K.dot(K.variable(x_dense), t_W))
assert k_s.shape == k_d.shape
assert_allclose(k_s, k_d, atol=1e-05)
def test_dtype2(self, dtype):
dtype = numpy.dtype(dtype)
# randint does not support 64 bit integers
if dtype in (numpy.int64, numpy.uint64):
return
iinfo = numpy.iinfo(dtype)
size = (10000,)
x = random.randint(iinfo.min, iinfo.max + 1, size, dtype)
self.assertEqual(x.dtype, dtype)
self.assertLessEqual(iinfo.min, min(x))
self.assertLessEqual(max(x), iinfo.max)
# Lower bound check
with self.assertRaises(ValueError):
random.randint(iinfo.min - 1, iinfo.min + 10, size, dtype)
# Upper bound check
with self.assertRaises(ValueError):
random.randint(iinfo.max - 10, iinfo.max + 2, size, dtype)
def test_load_columnar_pandas_all(self, con, all_types_table):
pd = pytest.importorskip("pandas")
import numpy as np
data = pd.DataFrame({
"boolean_": [True, False],
"smallint_": np.array([0, 1], dtype=np.int8),
"int_": np.array([0, 1], dtype=np.int32),
"bigint_": np.array([0, 1], dtype=np.int64),
"float_": np.array([0, 1], dtype=np.float32),
"double_": np.array([0, 1], dtype=np.float64),
"varchar_": ["a", "b"],
"text_": ['a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
"date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
}, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
'date_'])
con.load_table_columnar(all_types_table, data, preserve_index=False)
def test_load_table_creates(self, con, not_a_table):
pd = pytest.importorskip("pandas")
import numpy as np
data = pd.DataFrame({
"boolean_": [True, False],
"smallint_cast": np.array([0, 1], dtype=np.int8),
"smallint_": np.array([0, 1], dtype=np.int16),
"int_": np.array([0, 1], dtype=np.int32),
"bigint_": np.array([0, 1], dtype=np.int64),
"float_": np.array([0, 1], dtype=np.float32),
"double_": np.array([0, 1], dtype=np.float64),
"varchar_": ["a", "b"],
"text_": ['a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
"date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
}, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
'date_'])
con.load_table(not_a_table, data, create=True)
def test_pca_int_dtype_upcast_to_double(svd_solver):
# Ensure that all int types will be upcast to float64
X_i64 = np.random.RandomState(0).randint(0, 1000, (1000, 4))
X_i64 = X_i64.astype(np.int64)
X_i32 = X_i64.astype(np.int32)
dX_i64 = da.from_array(X_i64, chunks=X_i64.shape)
dX_i32 = da.from_array(X_i32, chunks=X_i32.shape)
pca_64 = dd.PCA(n_components=3, svd_solver=svd_solver,
random_state=0).fit(dX_i64)
pca_32 = dd.PCA(n_components=3, svd_solver=svd_solver,
random_state=0).fit(dX_i32)
assert pca_64.components_.dtype == np.float64
assert pca_32.components_.dtype == np.float64
assert pca_64.transform(dX_i64).dtype == np.float64
assert pca_32.transform(dX_i32).dtype == np.float64
assert_array_almost_equal(pca_64.components_, pca_32.components_,
decimal=5)
def pairwise_distances_argmin_min(X, Y, axis=1, metric="euclidean",
batch_size=None,
metric_kwargs=None):
if batch_size is None:
batch_size = max(X.chunks[0])
XD = X.to_delayed().flatten().tolist()
func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2)
blocks = [func(x, Y, metric=metric, batch_size=batch_size,
metric_kwargs=metric_kwargs)
for x in XD]
argmins, mins = zip(*blocks)
argmins = [da.from_delayed(block, (chunksize,), np.int64)
for block, chunksize in zip(argmins, X.chunks[0])]
# Scikit-learn seems to always use float64
mins = [da.from_delayed(block, (chunksize,), 'f8')
for block, chunksize in zip(mins, X.chunks[0])]
argmins = da.concatenate(argmins)
mins = da.concatenate(mins)
return argmins, mins
def predict(dbpath, features, sess, y):
U0 = []
U0_pred = []
count = 0
with connect(dbpath) as conn:
n_structures = conn.count()
for row in conn.select():
U0.append(row['U0'])
at = row.toatoms()
feed_dict = {
features['numbers']:
np.array(at.numbers).astype(np.int64),
features['positions']:
np.array(at.positions).astype(np.float32)
}
U0_p = sess.run(y, feed_dict=feed_dict)
U0_pred.append(U0_p)
if count % 1000 == 0:
print(str(count) + ' / ' + str(n_structures))
count += 1
return U0, U0_pred
finite_difference.py 文件源码
项目:house-of-enlightenment
作者: house-of-enlightenment
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def diffuse(self, pixels, delta_t):
self.pixels.append(np.empty([self.X_MAX, self.Y_MAX], dtype=float))
if delta_t < 5:
return
v = self.diffusion_constant
h0 = self.pixels[1]
h, idx = self.hCalc()
hDiff = (h - h0 * idx)
h = hDiff * delta_t * v + h0
# pylint: disable=no-member
pix = np.array(pixels[:, :][:], dtype=np.int64)
color = (pix[:, :, 0] << 16) | (pix[:, :, 1] << 8) | (pix[:, :, 2])
f = np.where(color == 0xFF0000, 0xFFFF, np.where(color == 0xFF00, 0 - 0xFFFF,
0))[:self.X_MAX, :self.Y_MAX]
h = h + f
h = np.clip(h, 0, 0xFFFF)
self.pixels[2] = np.clip(h, 0, 0xFFFF)
##
# This is the differences between node i,j and it's closest neighbors
# it's used in calculateing spatial derivitives
#
def get_label_batch(label_data, batch_size, batch_index):
nrof_examples = np.size(label_data, 0)
j = batch_index*batch_size % nrof_examples
if j+batch_size<=nrof_examples:
batch = label_data[j:j+batch_size]
else:
x1 = label_data[j:nrof_examples]
x2 = label_data[0:nrof_examples-j]
batch = np.vstack([x1,x2])
batch_int = batch.astype(np.int64)
return batch_int
def is_monotonic(serie):
if serie.dtype == np.float64:
return pd.algos.is_monotonic_float64(serie.values, False)[0]
elif serie.dtype == np.int64:
return pd.algos.is_monotonic_int64(serie.values, False)[0]
else:
raise ValueError("unexpected column type: %s" % serie.dtype)
def get_offset(data_dtype, dtype_offset):
if dtype_offset == 'auto':
if data_dtype in ['uint16', numpy.uint16]:
dtype_offset = 32768
elif data_dtype in ['int16', numpy.int16]:
dtype_offset = 0
elif data_dtype in ['int32', numpy.int32]:
dtype_offset = 0
elif data_dtype in ['int64', numpy.int64]:
dtype_offset = 0
elif data_dtype in ['float32', numpy.float32]:
dtype_offset = 0
elif data_dtype in ['int8', numpy.int8]:
dtype_offset = 0
elif data_dtype in ['uint8', numpy.uint8]:
dtype_offset = 127
elif data_dtype in ['float64', numpy.float64]:
dtype_offset = 0
if comm.rank == 0:
print_and_log(['data type offset for %s is automatically set to %d' %(data_dtype, dtype_offset)], 'debug', logger)
else:
try:
dtype_offset = int(dtype_offset)
except Exception:
if comm.rank == 0:
print_and_log(["Offset %s is not valid" %dtype_offset], 'error', logger)
sys.exit(1)
return dtype_offset
def _get_t_start_t_stop(self, idx, chunk_size, padding=(0,0)):
t_start = idx*numpy.int64(chunk_size)+padding[0]
t_stop = (idx+1)*numpy.int64(chunk_size)+padding[1]
if t_stop > self.duration:
t_stop = self.duration
if t_start < 0:
t_start = 0
return t_start, t_stop
def duration(self):
if self.is_stream:
duration = 0
for source in self._sources:
duration += source.duration
return duration
else:
return numpy.int64(self._shape[0])
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
local_shape = t_stop - t_start
if nodes is None:
nodes = numpy.arange(self.nb_channels, dtype=numpy.int32)
local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype)
for count, i in enumerate(nodes):
local_chunk[:, count] = self.data.get_entity(numpy.int64(i)).get_data(t_start, numpy.int64(local_shape))[0]
return self._scale_data_to_float32(local_chunk)
def gather_array(data, mpi_comm, root=0, shape=0, dtype='float32'):
# gather 1D or 2D numpy arrays
assert isinstance(data, numpy.ndarray)
assert len(data.shape) < 3
# first we pass the data size
size = data.size
sizes = mpi_comm.gather(size, root=root) or []
# now we pass the data
displacements = [int(sum(sizes[:i])) for i in range(len(sizes))]
if dtype is 'float32':
gdata = numpy.empty(int(sum(sizes)), dtype=numpy.float32)
mpi_comm.Gatherv([data.flatten(), size, MPI.FLOAT], [gdata, (sizes, displacements), MPI.FLOAT], root=root)
elif dtype is 'float64':
gdata = numpy.empty(int(sum(sizes)), dtype=numpy.float64)
mpi_comm.Gatherv([data.flatten(), size, MPI.DOUBLE], [gdata, (sizes, displacements), MPI.DOUBLE], root=root)
elif dtype is 'int32':
gdata = numpy.empty(int(sum(sizes)), dtype=numpy.int32)
mpi_comm.Gatherv([data.flatten(), size, MPI.INT], [gdata, (sizes, displacements), MPI.INT], root=root)
elif dtype is 'int64':
gdata = numpy.empty(int(sum(sizes)), dtype=numpy.int64)
mpi_comm.Gatherv([data.flatten(), size, MPI.LONG], [gdata, (sizes, displacements), MPI.LONG], root=root)
if len(data.shape) == 1:
return gdata
else:
if shape == 0:
num_lines = data.shape[0]
if num_lines > 0:
return gdata.reshape((num_lines, gdata.size//num_lines))
else:
return gdata.reshape((0, gdata.shape[1]))
if shape == 1:
num_columns = data.shape[1]
if num_columns > 0:
return gdata.reshape((gdata.size//num_columns, num_columns))
else:
return gdata.reshape((gdata.shape[0], 0))
02_frequency_discrimination_task.py 文件源码
项目:skiprnn-2017-telecombcn
作者: imatge-upc
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def generate_batch(batch_size, sampling_period, signal_duration, start_period, end_period,
start_target_period, end_target_period):
"""
Generate a stratified batch of examples. There are two classes:
class 0: sine waves with period in [start_target_period, end_target_period]
class 1: sine waves with period in [start_period, start_target_period] U [end_target_period, end_period]
:param batch_size: number of samples per batch
:param sampling_period: sampling period in milliseconds
:param signal_duration: duration of the sine waves in milliseconds
:return x: batch of examples
:return y: batch of labels
"""
seq_length = int(signal_duration / sampling_period)
n_elems = 1
x = np.empty((batch_size, seq_length, n_elems))
y = np.empty(batch_size, dtype=np.int64)
t = np.linspace(0, signal_duration - sampling_period, seq_length)
for idx in range(int(batch_size/2)):
period = random.uniform(start_target_period, end_target_period)
phase_shift = random.uniform(0, period)
x[idx, :, 0] = generate_example(t, 1./period, phase_shift)
y[idx] = 0
for idx in range(int(batch_size/2), batch_size):
period = random_disjoint_interval(start_period, end_period,
start_target_period, end_target_period)
phase_shift = random.uniform(0, period)
x[idx, :, 0] = generate_example(t, 1./period, phase_shift)
y[idx] = 1
return x, y