def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, **kwargs):
"""
Feedforward pass through GRU
"""
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
dim = tparams[_p(prefix,'Ux')].shape[1]
if init_state == None:
init_state = tensor.alloc(0., n_samples, dim)
if mask == None:
mask = tensor.alloc(1., state_below.shape[0], 1)
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n*dim:(n+1)*dim]
return _x[:, n*dim:(n+1)*dim]
state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
U = tparams[_p(prefix, 'U')]
Ux = tparams[_p(prefix, 'Ux')]
def _step_slice(m_, x_, xx_, h_, U, Ux):
preact = tensor.dot(h_, U)
preact += x_
r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
u = tensor.nnet.sigmoid(_slice(preact, 1, dim))
preactx = tensor.dot(h_, Ux)
preactx = preactx * r
preactx = preactx + xx_
h = tensor.tanh(preactx)
h = u * h_ + (1. - u) * h
h = m_[:,None] * h + (1. - m_)[:,None] * h_
return h
seqs = [mask, state_below_, state_belowx]
_step = _step_slice
rval, updates = theano.scan(_step,
sequences=seqs,
outputs_info = [init_state],
non_sequences = [tparams[_p(prefix, 'U')],
tparams[_p(prefix, 'Ux')]],
name=_p(prefix, '_layers'),
n_steps=nsteps,
profile=False,
strict=True)
rval = [rval]
return rval
评论列表
文章目录