def _forward_step(self, x_t, s_t):
"""Input vector/matrix x(t) and state matrix s(t)."""
# Gradient clipping
E, a, U, W, b, V, c = [ th.gradient.grad_clip(p, -5.0, 5.0) for p in self.params.values() ]
# Initialize state to return
s_next = T.zeros_like(s_t)
# Vocab-to-state encoding layer
inout = T.tanh(T.dot(x_t, E) + a)
# Loop over GRU layers
for layer in range(self.hyper.layers):
# 3 matrices per layer
L = layer * 3
# Get previous state for this layer
s_prev = s_t[layer]
# Update gate
z = T.nnet.hard_sigmoid(T.dot(inout, U[L]) + T.dot(s_prev, W[L]) + b[L])
# Reset gate
r = T.nnet.hard_sigmoid(T.dot(inout, U[L+1]) + T.dot(s_prev, W[L+1]) + b[L+1])
# Candidate state
h = T.tanh(T.dot(inout, U[L+2]) + T.dot(r * s_prev, W[L+2]) + b[L+2])
# New state
s_new = (T.ones_like(z) - z) * h + z * s_prev
s_next = T.set_subtensor(s_next[layer], s_new)
# Update for next layer or final output (might add dropout here later)
inout = s_new
# Final output
o_t = T.dot(inout, V) + c
return o_t, s_next
# Regularization cost
评论列表
文章目录