def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data)
t = n[1, 0]
gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
f = self.function([], gn)
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
if not self.fast_compile:
assert_equal(len(topo_), 6)
assert numpy.sum([isinstance(node.op, self.inc_sub)
for node in topo_]) == 1
assert numpy.sum([isinstance(node.op, self.sub)
for node in topo_]) == 1
gval = f()
good = numpy.zeros_like(data)
good[1, 0] = numpy.exp(data[1, 0])
self.assertTrue(numpy.allclose(gval, good), (gval, good))
python类grad()的实例源码
def test_err_bound_list(self):
n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
l = lvector()
t = n[l]
# We test again AdvancedSubtensor1 as we transfer data to the cpu.
self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
f = self.function([l], t, op=self.adv_sub1)
# the grad
g = self.function([l],
inc_subtensor(t, numpy.asarray([[1.]], self.dtype)),
op=self.adv_incsub1)
for shp in [[0, 4], [0, -3], [-10]]:
self.assertRaises(IndexError, f, shp)
self.assertRaises(IndexError, g, shp)
def test_grad_advanced_inc_subtensor(self):
def inc_slice(*s):
def just_numeric_args(a, b):
cost = (a[s] + b).sum()
cost_wrt_a = theano.tensor.grad(cost, a)
cost_wrt_b = theano.tensor.grad(cost, b)
grads = cost_wrt_a.sum() + cost_wrt_b.sum()
return grads
return just_numeric_args
# vector
utt.verify_grad(
inc_slice(slice(2, 4, None)),
(numpy.asarray([0, 1, 2, 3, 4, 5.]), numpy.asarray([9, 9.]),))
# matrix
utt.verify_grad(
inc_slice(slice(1, 2, None), slice(None, None, None)),
(numpy.asarray([[0, 1], [2, 3], [4, 5.]]),
numpy.asarray([[9, 9.]]),))
# single element
utt.verify_grad(
inc_slice(2, 1),
(numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
def test_inc_adv_subtensor_with_broadcasting(self):
if inplace_increment is None:
raise inplace_increment_missing
inc = dscalar()
a = inc_subtensor(self.m[self.ix1, self.ix12], inc)
g_inc = tensor.grad(a.sum(), inc)
assert a.type == self.m.type, (a.type, self.m.type)
f = theano.function([self.m, self.ix1, self.ix12, inc], [a, g_inc],
allow_input_downcast=True)
aval, gval = f([[.4, .9, .1],
[5, 6, 7],
[.5, .3, .15]],
[1, 2, 1],
[0, 1, 0],
2.1)
assert numpy.allclose(aval,
[[.4, .9, .1],
[5 + 2.1 * 2, 6, 7],
[.5, .3 + 2.1, .15]]), aval
assert numpy.allclose(gval, 3.0), gval
def test_inc_adv_subtensor1_with_broadcasting(self):
if inplace_increment is None:
raise inplace_increment_missing
inc = dscalar()
a = inc_subtensor(self.m[self.ix1], inc)
g_inc = tensor.grad(a.sum(), inc)
assert a.type == self.m.type, (a.type, self.m.type)
f = theano.function([self.m, self.ix1, inc], [a, g_inc],
allow_input_downcast=True)
aval, gval = f([[.4, .9, .1],
[5, 6, 7],
[.5, .3, .15]],
[0, 1, 0],
2.1)
assert numpy.allclose(aval,
[[.4 + 2.1 * 2, .9 + 2.1 * 2, .1 + 2.1 * 2],
[5 + 2.1, 6 + 2.1, 7 + 2.1],
[.5, .3, .15]]), aval
assert numpy.allclose(gval, 9.0), gval
def test_grad_argmin(self):
data = rand(2, 3)
n = as_tensor_variable(data)
n.name = 'n'
# test grad of argmin
utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
utt.verify_grad(lambda v: argmin(v, axis=[0]), [data])
utt.verify_grad(lambda v: argmin(v, axis=[1]), [data])
utt.verify_grad(lambda v: argmin(v.flatten()), [data])
try:
cost = argmin(n, axis=-1)
cost.name = None
g = grad(cost, n)
raise Exception('Expected an error')
except TypeError:
pass
def test_grad_argmax(self):
data = rand(2, 3)
n = as_tensor_variable(data)
# test grad of argmax
utt.verify_grad(lambda v: argmax(v, axis=-1), [data])
utt.verify_grad(lambda v: argmax(v, axis=[0]), [data])
utt.verify_grad(lambda v: argmax(v, axis=[1]), [data])
utt.verify_grad(lambda v: argmax(v.flatten()), [data])
try:
grad(argmax(n, axis=-1), n)
raise Exception('Expected an error')
except TypeError:
pass
def test_join_matrix_ints(self):
if "float32" in self.shared.__name__:
raise SkipTest(
"The shared variable constructor"
" need to support other dtype then float32")
# Test mixed dtype. There was a bug that caused crash in the past.
av = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='int8')
bv = numpy.array([[7], [8]], dtype='int32')
a = self.shared(av)
b = as_tensor_variable(bv)
s = join(1, a, b)
want = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]], dtype='float32')
out = self.eval_outputs_and_check_join([s])
self.assertTrue((out == want).all())
assert (numpy.asarray(grad(s.sum(), b).eval()) == 0).all()
assert (numpy.asarray(grad(s.sum(), a).eval()) == 0).all()
def test1(self):
s = scal.constant(56)
t = as_tensor_variable(s)
self.assertTrue(t.owner.op is tensor_from_scalar)
self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
self.assertTrue(t.type.ndim == 0, t.type.ndim)
self.assertTrue(t.type.dtype == s.type.dtype)
v = eval_outputs([t])
self.assertTrue(v == 56, v)
self.assertTrue(isinstance(v, numpy.ndarray))
self.assertTrue(v.shape == (), v.shape)
g = grad(t, s)
self.assertTrue(eval_outputs([g]) == 0.)
def test2(self):
s = scal.constant(56.)
t = as_tensor_variable(s)
self.assertTrue(t.owner.op is tensor_from_scalar)
self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
self.assertTrue(t.type.ndim == 0, t.type.ndim)
self.assertTrue(t.type.dtype == s.type.dtype)
v = eval_outputs([t])
self.assertTrue(v == 56., v)
self.assertTrue(isinstance(v, numpy.ndarray))
self.assertTrue(v.shape == (), v.shape)
g = grad(t, s)
self.assertTrue(eval_outputs([g]) == 1.)
def test0(self):
tt = constant(56) # scal.constant(56)
ss = scalar_from_tensor(tt)
self.assertTrue(ss.owner.op is scalar_from_tensor)
self.assertTrue(ss.type.dtype == tt.type.dtype)
v = eval_outputs([ss])
self.assertTrue(v == 56, v)
if config.cast_policy == 'custom':
self.assertTrue(isinstance(v, numpy.int16))
elif config.cast_policy in ('numpy', 'numpy+floatX'):
self.assertTrue(isinstance(
v, getattr(numpy, str(numpy.asarray(56).dtype))))
else:
raise NotImplementedError(config.cast_policy)
self.assertTrue(v.shape == (), v.shape)
tt = lscalar()
ss = scalar_from_tensor(tt)
g = ss.owner.op.grad([tt], [ss])
fff = function([tt], ss)
v = fff(numpy.asarray(5))
self.assertTrue(v == 5, v)
self.assertTrue(isinstance(v, numpy.int64))
self.assertTrue(v.shape == (), v.shape)
def test_grad_keep_type(self):
"""Tests that the theano grad method returns a list if it is passed a list
and a single variable if it is passed a single variable.
pylearn2 depends on theano behaving this way. This functionality has been
added three times and erroneously removed twice. If you do anything that
requires changing this test or making it fail you are almost certainly
making a common mistake, NOT fixing something. """
X = tensor.matrix()
y = X.sum()
G = tensor.grad(y, [X])
assert isinstance(G, list)
G = tensor.grad(y, X)
assert not isinstance(G, list)
def test_tile_grad():
def grad_tile(x, reps, np_x):
y = tile(x, reps)
z = y.sum()
g = theano.function([x], grad(z, x))
grad_res = g(np_x)
# The gradient should be the product of the tiling dimensions
# (since the gradients are additive through the tiling operation)
assert numpy.all(grad_res == numpy.prod(reps))
rng = numpy.random.RandomState(utt.fetch_seed())
# test vector
grad_tile(vector('x'), [3], rng.randn(5).astype(config.floatX))
# test matrix
grad_tile(matrix('x'), [3, 4], rng.randn(2, 3).astype(config.floatX))
# test tensor3
grad_tile(tensor3('x'), [3, 4, 5],
rng.randn(2, 4, 3).astype(config.floatX))
# test tensor4
grad_tile(tensor4('x'), [3, 4, 5, 6],
rng.randn(2, 4, 3, 5).astype(config.floatX))
def test_broadcast_grad():
# rng = numpy.random.RandomState(utt.fetch_seed())
x1 = T.tensor4('x')
# x1_data = rng.randn(1, 1, 300, 300)
sigma = T.scalar('sigma')
# sigma_data = 20
window_radius = 3
filter_1d = T.arange(-window_radius, window_radius + 1)
filter_1d = filter_1d.astype(theano.config.floatX)
filter_1d = T.exp(-0.5 * filter_1d**2 / sigma ** 2)
filter_1d = filter_1d / filter_1d.sum()
filter_W = filter_1d.dimshuffle(['x', 'x', 0, 'x'])
y = theano.tensor.nnet.conv2d(x1, filter_W, border_mode='full',
filter_shape=[1, 1, None, None])
theano.grad(y.sum(), sigma)
def test_local_softmax_grad_optimization_and_big_input(self):
"""Test the Logsoftmax's grad substitution.
Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s
grad and that the new operation does not explode for big inputs.
Note that only the grad is checked.
"""
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
# some inputs that are large to make the gradient explode in the non
# optimized case
a = numpy.exp(
10 * numpy.random.rand(5, 10).astype(theano.config.floatX))
def myfunc(x):
sm = tensor.nnet.softmax(x)
logsm = tensor.log(sm)
return logsm
# We set step to 0.1 because for big values we need a big epsilon
utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
sa = theano.shared(a)
f = theano.function([], myfunc(sa))
self.assertTrue(check_stack_trace(f, ops_to_check='all'))
def test_grad(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and softmaxgrad
w = T.matrix()
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([c, w], T.grad((p_y * w).sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.fgraph.toposort()]
# print '--- g ='
# printing.debugprint(g)
# print '==='
raise SkipTest('Optimization not enabled for the moment')
assert len(g_ops) == 2
assert softmax_op in g_ops
assert softmax_grad in g_ops
g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4)))
def test_transpose_basic(self):
# this should be a transposed softmax
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=0)
# test that function contains softmax and no div.
theano.function([c], p_y)
# printing.debugprint(f)
# test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
theano.function([c], T.grad(p_y.sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
# printing.debugprint(g)
raise SkipTest('Optimization not enabled for the moment')
def test_sparseblockgemv_grad_shape(self):
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.imatrix()
oIdx = tensor.imatrix()
o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
go = theano.grad(o.sum(), [b, W, h])
f = theano.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
# just make sure that it runs correcly and all the shapes are ok.
b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
assert b_g.shape == b_val.shape
assert h_g.shape == h_val.shape
assert W_g.shape == W_val.shape
def test_only_nonseq_inputs(self):
# Compile the Theano function
n_steps = 2
inp = tensor.matrix()
broadcasted_inp, _ = theano.scan(lambda x: x,
non_sequences=[inp],
n_steps=n_steps)
out = broadcasted_inp.sum()
gr = tensor.grad(out, inp)
fun = theano.function([inp], [broadcasted_inp, gr])
# Execute the Theano function and compare outputs to the expected outputs
inputs = numpy.array([[1, 2], [3, 4]], dtype=theano.config.floatX)
expected_out1 = numpy.repeat(inputs[None], n_steps, axis=0)
expected_out2 = numpy.ones(inputs.shape, dtype="int8") * n_steps
out1, out2 = fun(inputs)
utt.assert_allclose(out1, expected_out1)
utt.assert_allclose(out2, expected_out2)
# simple rnn, one input, one state, weights for each; input/state
# are vectors, weights are scalars
def test_verify_second_grad_sitsot(self):
def get_sum_of_grad(inp):
scan_outputs, updates = theano.scan(fn=lambda x: x * 2,
outputs_info=[inp],
n_steps=5)
# Take the gradient of each output wrt its corresponding initial
# state
return theano.grad(scan_outputs.sum(), inp).sum()
# Call verify_grad to ensure the correctness of the second gradients
floatX = theano.config.floatX
inputs_test_values = [numpy.random.random((3)).astype(floatX)]
theano.tests.unittest_tools.verify_grad(get_sum_of_grad,
inputs_test_values)