def test_infer_shape(self):
for format in sparse.sparse_formats:
for dtype in sparse.all_dtypes:
(x, ), (x_value, ) = sparse_random_inputs(format,
shape=(9, 10),
out_dtype=dtype,
p=0.1)
(y, ), (y_value, ) = sparse_random_inputs(format,
shape=(10, 24),
out_dtype=dtype,
p=0.1)
variable = [x, y]
data = [x_value, y_value]
self._compile_and_check(variable,
[self.op(*variable)],
data,
self.op_class)
python类sparse()的实例源码
def make_node(self, x, y):
x, y = sparse.as_sparse_variable(x), tensor.as_tensor_variable(y)
out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if self.inplace:
assert out_dtype == y.dtype
indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
# We either use CSC or CSR depending on the format of input
assert self.format == x.type.format
# The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2)
assert y.type.ndim == 2
out = tensor.TensorType(dtype=out_dtype,
broadcastable=y.type.broadcastable)()
return gof.Apply(self,
[data, indices, indptr, y],
[out])
def local_structured_dot(node):
if node.op == sparse._structured_dot:
a, b = node.inputs
if a.type.format == 'csc':
a_val, a_ind, a_ptr, a_shape = csm_properties(a)
a_nsparse = a_shape[0]
return [sd_csc(a_val, a_ind, a_ptr, a_nsparse, b)]
if a.type.format == 'csr':
a_val, a_ind, a_ptr, a_shape = csm_properties(a)
return [sd_csr(a_val, a_ind, a_ptr, b)]
return False
# Commented out because
# a) it is only slightly faster than scipy these days, and sometimes a little
# slower, and
# b) the resulting graphs make it very difficult for an op to do size checking
# on the matrices involved. dimension mismatches are hard to detect sensibly.
# register_specialize(local_structured_dot)
def grad(self, inputs, grads):
global sparse_module_ref
x, ilist = inputs
gz, = grads
assert len(inputs) == 2
if self.sparse_grad:
if x.type.ndim != 2:
raise TypeError(
"AdvancedSubtensor1: you can't take the sparse grad"
" from a tensor with ndim != 2. ndim is " +
str(x.type.ndim))
if sparse_module_ref is None:
import theano.sparse as sparse_module_ref
rval1 = [sparse_module_ref.construct_sparse_from_list(x, gz,
ilist)]
else:
rval1 = [advanced_inc_subtensor1(x.zeros_like(), gz, ilist)]
return rval1 + [DisconnectedType()()] * (len(inputs) - 1)
def detect_nan(i, node, fn):
for output in fn.outputs:
if not isinstance(output[0], np.random.RandomState):
if sp.sparse.issparse(output[0]):
nans = np.isnan(output[0].data).any()
else:
nans = np.isnan(output[0]).any()
if nans:
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
print('Outputs: %s' % [output[0] for output in fn.outputs])
break
def load_toy_data(n_samples=1000, dtype='float32'):
print('creating Melbourne toy dataset as an inverse problem.')
print('There are two (if not more) Melbournes, one in Australia and one in Florida, USA')
mlb_fl_latlon_mean = np.array((28.0836, -80.6081))
mlb_au_latlon_mean = np.array((-37.8136, 144.9631))
cov = np.array([[1, 0], [0, 1]])
# create bivariate gaussians to sample from the means (with variances 1, 1 and correlation 0) Melb, Au samples are two times of Melb, FL
mlb_fl_samples = np.random.multivariate_normal(mean=mlb_fl_latlon_mean, cov=cov, size=n_samples).astype(dtype)
mlb_au_samples = np.random.multivariate_normal(mean=mlb_au_latlon_mean, cov=cov, size=n_samples * 2).astype(dtype)
# plt.scatter(mlb_fl_samples[:, 0], mlb_fl_samples[:, 1], c='blue', s=1)
# plt.scatter(mlb_au_samples[:, 0], mlb_au_samples[:, 1], c='red', s=1)
# plt.show()
X = sp.sparse.csr_matrix(np.random.uniform(-0.1, 0.1, size=(n_samples * 3, 2)) + np.array([1, 0])).astype(dtype)
Y = np.vstack((mlb_fl_samples, mlb_au_samples))
# shuffle X and Y
indices = np.arange(n_samples * 3)
np.random.shuffle(indices)
X = X[indices]
Y = Y[indices]
n_train_samples = 2 * n_samples
n_dev_samples = n_samples / 2
n_test_samples = 3 * n_samples - n_train_samples - n_dev_samples
X_train = X[0:n_train_samples, :]
X_dev = X[n_train_samples:n_train_samples + n_dev_samples, :]
X_test = X[n_train_samples + n_dev_samples:n_train_samples + n_dev_samples + n_test_samples, :]
Y_train = Y[0:n_train_samples, :]
Y_dev = Y[n_train_samples:n_train_samples + n_dev_samples, :]
Y_test = Y[n_train_samples + n_dev_samples:n_train_samples + n_dev_samples + n_test_samples, :]
U_train = [i for i in range(n_train_samples)]
U_dev = [i for i in range(n_train_samples, n_train_samples + n_dev_samples)]
U_test = [i for i in range(n_train_samples + n_dev_samples, n_train_samples + n_dev_samples + n_test_samples)]
userLocation = {}
for i in range(0, 3 * n_samples):
lat, lon = Y[i, :]
userLocation[i] = str(lat) + ',' + str(lon)
data = (X_train, Y_train, X_dev, Y_dev, X_test, Y_test, U_train, U_dev, U_test, None, None, userLocation, None)
return data
def __init__(self,
n_epochs=10,
batch_size=1000,
regul_coef=1e-6,
input_size=None,
output_size = None,
hid_size=100,
drop_out=False,
dropout_coef=0.5,
early_stopping_max_down=10,
dtype='float32',
autoencoder=100,
input_sparse=False,
reload=False,
ncomp=100,
sqerror=False,
dataset_name=''):
self.n_epochs = n_epochs
self.batch_size = batch_size
self.regul_coef = regul_coef
self.hid_size = hid_size
self.drop_out = drop_out
self.dropout_coef = dropout_coef
self.early_stopping_max_down = early_stopping_max_down
self.dtype = dtype
self.input_size = input_size
self.output_size = output_size
self.autoencoder = autoencoder
self.sparse = input_sparse
self.reload = reload
self.n_bigaus_comp = ncomp
self.sqerror = sqerror
self.dataset_name = dataset_name
logging.info('building nn model with %d hidden size, %d bivariate gaussian components and %d output size' % (self.hid_size, self.n_bigaus_comp, self.output_size) )
if self.sqerror:
self.build_squarederror_regression()
else:
self.build()
def _assert_sparse_module():
if not th_sparse_module:
raise ImportError("Failed to import theano.sparse\n"
"You probably need to pip install nose-parameterized")
def concatenate(tensors, axis=-1):
if py_all([is_sparse(x) for x in tensors]):
axis = axis % ndim(tensors[0])
if axis == 0:
return th_sparse_module.basic.vstack(tensors, format='csr')
elif axis == 1:
return th_sparse_module.basic.hstack(tensors, format='csr')
else:
raise Exception('Invalid concat axis for sparse matrix: ' + axis)
else:
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
def _assert_sparse_module():
if not th_sparse_module:
raise ImportError("Failed to import theano.sparse\n"
"You probably need to pip install nose-parameterized")
def concatenate(tensors, axis=-1):
if py_all([is_sparse(x) for x in tensors]):
axis = axis % ndim(tensors[0])
if axis == 0:
return th_sparse_module.basic.vstack(tensors, format='csr')
elif axis == 1:
return th_sparse_module.basic.hstack(tensors, format='csr')
else:
raise ValueError('Invalid concat axis for sparse matrix:', axis)
else:
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
def _assert_sparse_module():
if not th_sparse_module:
raise ImportError("Failed to import theano.sparse\n"
"You probably need to pip install nose-parameterized")
def concatenate(tensors, axis=-1):
if py_all([is_sparse(x) for x in tensors]):
axis = axis % ndim(tensors[0])
if axis == 0:
return th_sparse_module.basic.vstack(tensors, format='csr')
elif axis == 1:
return th_sparse_module.basic.hstack(tensors, format='csr')
else:
raise ValueError('Invalid concat axis for sparse matrix:', axis)
else:
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
def _assert_sparse_module():
if not th_sparse_module:
raise ImportError("Failed to import theano.sparse\n"
"You probably need to pip install nose-parameterized")
def concatenate(tensors, axis=-1):
if py_all([is_sparse(x) for x in tensors]):
axis = axis % ndim(tensors[0])
if axis == 0:
return th_sparse_module.basic.vstack(tensors, format='csr')
elif axis == 1:
return th_sparse_module.basic.hstack(tensors, format='csr')
else:
raise ValueError('Invalid concat axis for sparse matrix:', axis)
else:
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
def _setup_vars(self, sparse_input):
'''Setup Theano variables for our network.
Parameters
----------
sparse_input : bool
If True, create an input variable that can hold a sparse matrix.
Defaults to False, which assumes all arrays are dense.
Returns
-------
vars : list of theano variables
A list of the variables that this network requires as inputs.
'''
# x represents our network's input.
self.x = TT.matrix('x')
if sparse_input:
self.x = SS.csr_matrix('x')
# this variable holds the target outputs for input x.
self.targets = TT.matrix('targets')
# the weight array is provided to ensure that different target values
# are taken into account with different weights during optimization.
self.weights = TT.matrix('weights')
if self.weighted:
return [self.x, self.targets, self.weights]
return [self.x, self.targets]
def _setup_vars(self, sparse_input):
'''Setup Theano variables for our network.
Parameters
----------
sparse_input : bool
If True, create an input variable that can hold a sparse matrix.
Defaults to False, which assumes all arrays are dense.
Returns
-------
vars : list of theano variables
A list of the variables that this network requires as inputs.
'''
# x represents our network's input.
self.x = TT.matrix('x')
if sparse_input:
self.x = SS.csr_matrix('x')
# for a classifier, this specifies the correct labels for a given input.
self.labels = TT.ivector('labels')
# and the weights are reshaped to be just a vector.
self.weights = TT.vector('weights')
if self.weighted:
return [self.x, self.labels, self.weights]
return [self.x, self.labels]
def _assert_sparse_module():
if not th_sparse_module:
raise ImportError("Failed to import theano.sparse\n"
"You probably need to pip install nose-parameterized")
def concatenate(tensors, axis=-1):
if py_all([is_sparse(x) for x in tensors]):
axis = axis % ndim(tensors[0])
if axis == 0:
return th_sparse_module.basic.vstack(tensors, format='csr')
elif axis == 1:
return th_sparse_module.basic.hstack(tensors, format='csr')
else:
raise ValueError('Invalid concat axis for sparse matrix:', axis)
else:
return T.concatenate([to_dense(x) for x in tensors], axis=axis)
def test_local_dense_from_sparse_sparse_from_dense():
mode = theano.compile.mode.get_default_mode()
mode = mode.including("local_dense_from_sparse_sparse_from_dense")
m = theano.tensor.matrix()
for op in [theano.sparse.csr_from_dense, theano.sparse.csc_from_dense]:
s = op(m)
o = theano.sparse.dense_from_sparse(s)
f = theano.function([m], o, mode=mode)
# We should just have a deep copy.
assert len(f.maker.fgraph.apply_nodes) == 1
f([[1, 2], [3, 4]])