def test_one_sequence_one_output_weights_gpu2(self):
def f_rnn(u_t, x_tm1, W_in, W):
return u_t * W_in + x_tm1 * W
u = theano.tensor.fvector('u')
x0 = theano.tensor.fscalar('x0')
W_in = theano.tensor.fscalar('win')
W = theano.tensor.fscalar('w')
output, updates = theano.scan(f_rnn,
u,
x0,
[W_in, W],
n_steps=None,
truncate_gradient=-1,
go_backwards=False,
mode=self.mode_with_gpu)
f2 = theano.function([u, x0, W_in, W],
output,
updates=updates,
allow_input_downcast=True,
mode=self.mode_with_gpu)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform(size=(4,), low=-5., high=5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[0] * W_in + v_x0 * W
for step in xrange(1, 4):
v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
theano_values = f2(v_u, v_x0, W_in, W)
utt.assert_allclose(theano_values, v_out)
topo = f2.maker.fgraph.toposort()
assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
for node in topo]) == 1
assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
for node in topo]) == 4
scan_node = [node for node in topo
if isinstance(node.op, theano.scan_module.scan_op.Scan)]
assert len(scan_node) == 1
scan_node = scan_node[0]
scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()
# check that there is no gpu transfer in the inner loop.
assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
for node in scan_node_topo])
assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
for node in scan_node_topo])
assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
for node in scan_node_topo])
# This third test checks that scan can deal with a mixture of dtypes as
# outputs when is running on GPU
评论列表
文章目录