def get_output_for(self, input, deterministic=False, **kwargs):
if not isinstance(input, (S.SparseVariable, S.SparseConstant,
S.sharedvar.SparseTensorSharedVariable)):
raise ValueError("Input for this layer must be sparse")
if deterministic or self.p == 0:
return input
else:
# Using Theano constant to prevent upcasting
one = T.constant(1, name='one')
retain_prob = one - self.p
if self.rescale:
input = S.mul(input, one/retain_prob)
input_shape = self.input_shape
if any(s is None for s in input_shape):
input_shape = input.shape
return input * self._srng.binomial(input_shape, p=retain_prob,
dtype=input.dtype)
python类constant()的实例源码
def l2_decay(self, gamma, layers=None):
'''L2 decay cost.
Args:
gamma (float): l2 decay rate.
layers (Optional[list]): layer numbers to do l2 decay on.
Returns:
T.tensor: L2 cost.
'''
if layers is None:
layers = range(self.n_layers)
cost = T.constant(0.).astype(floatX)
for l in layers:
W = self.__dict__['W%d' % l]
cost += gamma * (W ** 2).sum()
return cost
def l2_decay(self, rate):
rec_l2_cost = T.constant(0.).astype(floatX)
gen_l2_cost = T.constant(0.).astype(floatX)
for l in xrange(self.n_layers):
rec_l2_cost += self.posteriors[l].l2_decay(rate)
gen_l2_cost += self.conditionals[l].l2_decay(rate)
rval = OrderedDict(
rec_l2_cost=rec_l2_cost,
gen_l2_cost=gen_l2_cost,
cost = rec_l2_cost + gen_l2_cost
)
return rval
# --------------------------------------------------------------------------
def forward(self, inputtensor):
inputimage = inputtensor[0]
#print('conv2d.forward.type: {}'.format(inputimage.ndim))
if self.dc == 0.0:
pass
else:
if 0 <self.dc <=1:
_srng = RandomStreams(np.random.randint(1, 2147462579))
one = T.constant(1)
retain_prob = one - self.dc
mask_shape = self.w.shape
mask = _srng.binomial(mask_shape, p=retain_prob,
dtype=self.w.dtype)
self.w = self.w * mask
else:
raise IndexError
l3conv = T.nnet.conv2d(inputimage,
self.w,
border_mode=self.border,
subsample=self.subsample)
if self.need_bias:
return ((l3conv+self.b.dimshuffle('x', 0, 'x', 'x')), )
else:
return (l3conv, )
def forward(self, inputtensor):
inputimage = inputtensor[0]
if self.dc == 0.0:
pass
else:
if 0 <self.dc <=1:
_srng = RandomStreams(np.random.randint(1, 2147462579))
one = T.constant(1)
retain_prob = one - self.dc
mask_shape = self.w.shape
mask = _srng.binomial(mask_shape, p=retain_prob,
dtype=self.w.dtype)
self.w = self.w * mask
else:
raise IndexError
if self.need_bias:
return ((T.dot(inputimage, self.w)+self.b), )
else:
return (T.dot(inputimage, self.w),)
def RmsProp(cost, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
updates = OrderedDict()
grads = T.grad(cost, params)
# Using theano constant to prevent upcasting of float32
one = T.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
updates[param] = param - (learning_rate * grad /
T.sqrt(accu_new + epsilon))
return updates
def EGD(cost, params, learning_rate = 0.33, constraint = 1.0):
updates = OrderedDict()
grads = T.grad(cost, params)
U = T.constant(constraint)
#first half of params
rw_pos = T.exp(-learning_rate * U * grads[0])
rb_pos = T.exp(-learning_rate * U * grads[1])
#second half
rw_neg = 1/rw_pos
rb_neg = 1/rb_pos
rs = [rw_pos, rb_pos, rw_neg, rb_neg]
partition = T.sum(params[0]*rs[0]) + T.sum(params[1]*rs[1]) + T.sum(params[2]*rs[2]) + T.sum(params[3]*rs[3])
for param, r in zip(params, rs):
updates[param] = U*param*r/partition
return updates
def get_output_for(self, input, deterministic=False, **kwargs):
"""
Parameters
----------
input : tensor
output from the previous layer
deterministic : bool
If true dropout and scaling is disabled, see notes
"""
if deterministic or self.p == 0:
return input
else:
# Using theano constant to prevent upcasting
one = T.constant(1)
retain_prob = one - self.p
if self.rescale:
input /= retain_prob
mask = _srng.binomial(input.shape[:2], p=retain_prob,
dtype=theano.config.floatX)
axes = [0, 1] + (['x'] * (input.ndim - 2))
mask = mask.dimshuffle(*axes)
return input * mask
def get_output_for(self, input, deterministic=False, **kwargs):
"""
Parameters
----------
input : tensor
output from the previous layer
deterministic : bool
If true dropout and scaling is disabled, see notes
"""
if deterministic or self.p == 0:
return input
else:
# Using theano constant to prevent upcasting
one = T.constant(1)
retain_prob = one - self.p
if self.rescale:
input /= retain_prob
mask = _srng.binomial(input.shape[:2], p=retain_prob,
dtype=theano.config.floatX)
axes = [0, 1] + (['x'] * (input.ndim - 2))
mask = mask.dimshuffle(*axes)
return input * mask
def temporal_padding_mask(mask, kernel_size, padding_size):
"""Pad the middle dimension of a 2D matrix
with "padding" zeros left and right.
Apologies for the inane API, but Theano makes this
really hard.
Code from https://github.com/fchollet/keras/blob/master/keras/backend/theano_backend.py
x: (batch, length)
"""
mask_shape = mask.shape
mask_sum = T.sum(mask, axis=1)
output_length = mask_sum - kernel_size + 2 * padding_size + 1
max_output_length = mask_shape[1] - kernel_size + 2 * padding_size + 1
real_output_length = T.maximum(output_length, 1)
range_base = T.arange(max_output_length)
range_matrix = T.outer(T.ones((mask_shape[0],)), range_base)
mask = (range_matrix < real_output_length[:, None]) * T.constant(1.0)
return mask
def print_graph_linker(print_prog=True):
if 1:
imap = {None:'-'}
def blah(i, node, thunk):
imap[node] = str(i)
if print_prog:# and node.op.__class__ is T.DimShuffle:
if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
print(node.op == T.DimShuffle((), ['x', 'x'],
inplace=True), end=' ')
print(node.inputs[0], type(node.inputs[0]), end=' ')
print(node.inputs[0].equals(T.constant(2)), end=' ')
outputs = node.outputs
inputs = theano.gof.graph.inputs(outputs)
print('node ', i, node, end=' ')
print(':'.join([imap[inp.owner] for inp in node.inputs]))
#print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
return theano.sandbox.wraplinker.WrapLinkerMany(
[theano.gof.OpWiseCLinker()],
[theano.sandbox.wraplinker.run_all
,blah
#,theano.sandbox.wraplinker.numpy_notall_isfinite
])
else:
return theano.gof.OpWiseCLinker()
def test_csm_unsorted(self):
"""
Test support for gradients of unsorted inputs.
"""
sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix}
for format in ['csr', 'csc', ]:
for dtype in ['float32', 'float64']:
x = tensor.tensor(dtype=dtype, broadcastable=(False,))
y = tensor.ivector()
z = tensor.ivector()
s = tensor.ivector()
# Sparse advanced indexing produces unsorted sparse matrices
a = sparse_random_inputs(format, (4, 3), out_dtype=dtype,
unsorted_indices=True)[1][0]
# Make sure it's unsorted
assert not a.has_sorted_indices
def my_op(x):
y = tensor.constant(a.indices)
z = tensor.constant(a.indptr)
s = tensor.constant(a.shape)
return tensor.sum(
dense_from_sparse(CSM(format)(x, y, z, s) * a))
verify_grad_sparse(my_op, [a.data])
def test_constant_folding():
""" Test that constant folding get registered at fast_compile
An error removed that registration during the registration.
"""
x = tensor.dvector()
mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
f = theano.function([x], [x * 2, x + x], mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
# Test that we do not crash when constant folding elemwise scalar
# as they should not generate c code.
x = tensor.constant(3)
assert x.ndim == 0
mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
f = theano.function([], [x * 2, x + x], mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert all([isinstance(n.op, DeepCopyOp) for n in topo])
def test_local_add_specialize():
# test of non-zero dimension
a = tensor.vector()
s = tensor.add(tensor.zeros_like(a))
assert local_add_specialize.transform(s.owner)
# test of 0-d
a = tensor.scalar()
s = tensor.add(tensor.zeros_like(a))
assert local_add_specialize.transform(s.owner)
# Test when the 0 input is forcing upcasting
a = tensor.constant(0, dtype='int64')
b = tensor.constant(1, dtype='int32')
s = a + b
transformed = local_add_specialize.transform(s.owner)
assert transformed
assert transformed[0].type == s.type
def test_lt(self):
for dtype in self.dtypes:
l = numpy.asarray([0., -1., 1.], dtype=dtype)
r = numpy.asarray([0., 1., -1.], dtype=dtype)
for x, y, err in [
(self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
(l, self.shared(r.astype(dtype)), True),
(tensor.constant(l), self.shared(r.astype(dtype)), False),
(self.shared(l.astype(dtype)), r, False),
(self.shared(l.astype(dtype)), tensor.constant(r), False),
]:
try:
fn = self.inplace_func([], x < y)
v = fn()
self.assertTrue(numpy.all(v == (l < r)), (v, (l < r)))
except TypeError:
assert err
def test_le(self):
for dtype in self.dtypes:
l = numpy.asarray([0., -1., 1.], dtype=dtype)
r = numpy.asarray([0., 1., -1.], dtype=dtype)
for x, y, err in [
(self.shared(l.astype(dtype)),
self.shared(r.astype(dtype)), False),
(l, self.shared(r.astype(dtype)), True),
(tensor.constant(l), self.shared(r.astype(dtype)), False),
(self.shared(l.astype(dtype)), r, False),
(self.shared(l.astype(dtype)), tensor.constant(r), False),
]:
try:
fn = self.inplace_func([], x <= y)
v = fn()
self.assertTrue(numpy.all(v == (l <= r)), (v, (l <= r)))
except TypeError:
assert err
def test_eq(self):
for dtype in self.dtypes:
l = numpy.asarray([0., -1., 1.], dtype=dtype)
r = numpy.asarray([0., 1., -1.], dtype=dtype)
for x, y, err in [
(self.shared(l.astype(dtype)),
self.shared(r.astype(dtype)), False),
(l, self.shared(r.astype(dtype)), True),
(tensor.constant(l), self.shared(r.astype(dtype)), False),
(self.shared(l.astype(dtype)), r, False),
(self.shared(l.astype(dtype)), tensor.constant(r), False),
]:
try:
fn = self.inplace_func([], eq(x, y))
v = fn()
self.assertTrue(numpy.all(v == (l == r)), (v, (l == r)))
except TypeError:
assert err
def test_neq(self):
for dtype in self.dtypes:
l = numpy.asarray([0., -1., 1.], dtype=dtype)
r = numpy.asarray([0., 1., -1.], dtype=dtype)
for x, y, err in [
(self.shared(l.astype(dtype)),
self.shared(r.astype(dtype)), False),
(l, self.shared(r.astype(dtype)), True),
(tensor.constant(l), self.shared(r.astype(dtype)), False),
(self.shared(l.astype(dtype)), r, False),
(self.shared(l.astype(dtype)), tensor.constant(r), False),
]:
try:
fn = self.inplace_func([], neq(x, y))
v = fn()
self.assertTrue(numpy.all(v == (l != r)), (v, (l != r)))
except TypeError:
assert err
def test1(self):
s = scal.constant(56)
t = as_tensor_variable(s)
self.assertTrue(t.owner.op is tensor_from_scalar)
self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
self.assertTrue(t.type.ndim == 0, t.type.ndim)
self.assertTrue(t.type.dtype == s.type.dtype)
v = eval_outputs([t])
self.assertTrue(v == 56, v)
self.assertTrue(isinstance(v, numpy.ndarray))
self.assertTrue(v.shape == (), v.shape)
g = grad(t, s)
self.assertTrue(eval_outputs([g]) == 0.)
def test2(self):
s = scal.constant(56.)
t = as_tensor_variable(s)
self.assertTrue(t.owner.op is tensor_from_scalar)
self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
self.assertTrue(t.type.ndim == 0, t.type.ndim)
self.assertTrue(t.type.dtype == s.type.dtype)
v = eval_outputs([t])
self.assertTrue(v == 56., v)
self.assertTrue(isinstance(v, numpy.ndarray))
self.assertTrue(v.shape == (), v.shape)
g = grad(t, s)
self.assertTrue(eval_outputs([g]) == 1.)
def test0(self):
tt = constant(56) # scal.constant(56)
ss = scalar_from_tensor(tt)
self.assertTrue(ss.owner.op is scalar_from_tensor)
self.assertTrue(ss.type.dtype == tt.type.dtype)
v = eval_outputs([ss])
self.assertTrue(v == 56, v)
if config.cast_policy == 'custom':
self.assertTrue(isinstance(v, numpy.int16))
elif config.cast_policy in ('numpy', 'numpy+floatX'):
self.assertTrue(isinstance(
v, getattr(numpy, str(numpy.asarray(56).dtype))))
else:
raise NotImplementedError(config.cast_policy)
self.assertTrue(v.shape == (), v.shape)
tt = lscalar()
ss = scalar_from_tensor(tt)
g = ss.owner.op.grad([tt], [ss])
fff = function([tt], ss)
v = fff(numpy.asarray(5))
self.assertTrue(v == 5, v)
self.assertTrue(isinstance(v, numpy.int64))
self.assertTrue(v.shape == (), v.shape)
def _test_autocast_numpy():
"""Called from `test_autocast`."""
assert config.cast_policy == 'numpy'
# Go through some typical scalar values.
def ok(z):
assert tensor.constant(z).dtype == numpy.asarray(z).dtype
for x in ([2 ** i for i in xrange(63)] +
[0, L(0), L(1), L(2 ** 63 - 1)] +
[0., 1., 1.1, 1.5]):
n_x = numpy.asarray(x)
# Make sure the data type is the same as the one found by numpy.
ok(x)
ok(-x)
ok(x - 1)
ok(-x + 1)
ok(n_x)
def infer_shape(self, node, i_shapes):
r, shp = node.inputs[0:2]
# if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape = len(getattr(shp, 'data', [0, 1, 2])) == 0
# if ndim_added == 0 and shape != () then shape
if self.ndim_added == 0 and not unknown_shape:
sample_shp = shp
else:
# if shape == () then it will depend on args
# if ndim_added != 0 and shape != () then it will depend on args
# Use the default infer_shape implementation.
raise tensor.ShapeError()
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def make_node(self, x, index):
assert isinstance(x.type, TypedListType)
if not isinstance(index, Variable):
if isinstance(index, slice):
index = Constant(SliceType(), index)
return Apply(self, [x, index], [x.type()])
else:
index = T.constant(index, ndim=0, dtype='int64')
return Apply(self, [x, index], [x.ttype()])
if isinstance(index.type, SliceType):
return Apply(self, [x, index], [x.type()])
elif isinstance(index, T.TensorVariable) and index.ndim == 0:
assert index.dtype == 'int64'
return Apply(self, [x, index], [x.ttype()])
else:
raise TypeError('Expected scalar or slice as index.')
def test_constant(self):
orig_compute_test_value = theano.config.compute_test_value
try:
theano.config.compute_test_value = 'raise'
x = T.constant(numpy.random.rand(2, 3), dtype=config.floatX)
y = theano.shared(numpy.random.rand(3, 6).astype(config.floatX),
'y')
# should work
z = T.dot(x, y)
assert hasattr(z.tag, 'test_value')
f = theano.function([], z)
assert _allclose(f(), z.tag.test_value)
# this test should fail
x = T.constant(numpy.random.rand(2, 4), dtype=config.floatX)
self.assertRaises(ValueError, T.dot, x, y)
finally:
theano.config.compute_test_value = orig_compute_test_value
def test_gpualloc():
'''
This tests tries to catch the scenario when, due to infer_shape,
the input of the alloc changes from tensor scalar to a constant
1. In this case the original constracted broadcastable pattern will
have a False for that dimension, but the new broadcastable pattern
that will be inserted by gpualloc will have a True since it knows the
dimension is 1 and therefore broadcastable.
'''
x = theano.shared(numpy.ones(3, dtype='float32'), 'x')
m = (x).dimshuffle(['x', 0])
v = tensor.alloc(1., *m.shape)
f = theano.function([],
v + x,
mode=mode_with_gpu.excluding(
"local_elemwise_alloc"))
l = f.maker.fgraph.toposort()
assert numpy.any([isinstance(y.op, cuda.GpuAlloc) for y in l])
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
"""
"""
updates = OrderedDict()
# Using theano constant to prevent upcasting of float32
one = T.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
try:
updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
T.sqrt(accu_new + epsilon)) , MAX_NORM )
except:
updates[param] = param - (learning_rate * grad /
T.sqrt(accu_new + epsilon))
return updates
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
updates = OrderedDict()
# Using theano constant to prevent upcasting of float32
one = T.constant(1)
c = 0
for param, grad in zip(params, grads):
print c
value = param.get_value(borrow=True)
accu = theano.shared(numpy.zeros(value.shape, dtype=value.dtype),broadcastable=param.broadcastable)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
mid_up = param - (learning_rate * grad / (T.sqrt(accu_new + epsilon)))
try:
updates[param] = lasagne.updates.norm_constraint( mid_up , 40 , 0)
except:
updates[param] = mid_up
c+=1
return updates
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
"""
"""
updates = OrderedDict()
# Using theano constant to prevent upcasting of float32
one = T.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
try:
updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
T.sqrt(accu_new + epsilon)) , MAX_NORM )
except:
updates[param] = param - (learning_rate * grad /
T.sqrt(accu_new + epsilon))
return updates
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
"""
"""
updates = OrderedDict()
# Using theano constant to prevent upcasting of float32
one = T.constant(1)
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
accu_new = rho * accu + (one - rho) * grad ** 2
updates[accu] = accu_new
try:
updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
T.sqrt(accu_new + epsilon)) , MAX_NORM )
except:
updates[param] = param - (learning_rate * grad /
T.sqrt(accu_new + epsilon))
return updates