def get_output_for(self, inputs, **kwargs):
# typical GRU, but prediction produced by softmax layer is applied to GRU's input
q = inputs[0]
m = inputs[1]
epmem_dropout = inputs[2]
#q = q * self.rand_stream.binomial(q.shape, p=1-epmem_dropout, dtype=theano.config.floatX)
m = m * self.rand_stream.binomial(m.shape, p=1-epmem_dropout, dtype=theano.config.floatX)
W_in_stacked = T.concatenate([self.W_in_to_resetgate,
self.W_in_to_updategate,
self.W_in_to_hid_update], axis=1)
W_hid_stacked = T.concatenate([self.W_hid_to_resetgate,
self.W_hid_to_updategate,
self.W_hid_to_hid_update], axis=1)
b_stacked = T.concatenate([self.b_resetgate,
self.b_updategate,
self.b_hid_update], axis=0)
def slice_w(x, n):
return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size]
def get_output(a):
return nonlin.softmax(T.dot(a,self.W))
def step(hid_previous, out_previous, *args):
input_n = T.concatenate([out_previous, q], axis=1)
hid_input = T.dot(hid_previous, W_hid_stacked)
input_n = T.dot(input_n, W_in_stacked) + b_stacked
resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
resetgate = self.nonlinearity_resetgate(resetgate)
updategate = self.nonlinearity_updategate(updategate)
hid_update_in = slice_w(input_n, 2)
hid_update_hid = slice_w(hid_input, 2)
hid_update = hid_update_in + resetgate*hid_update_hid
hid_update = self.nonlinearity_hid(hid_update)
hid = (1 - updategate)*hid_previous + updategate+hid_update
out = nonlin.softmax(T.dot(hid, self.W))
return (hid, out)
non_seqs = [W_in_stacked, b_stacked, W_hid_stacked, q, m, self.W]
hid_and_out, b = theano.scan(
fn=step,
outputs_info=[m, get_output(m)],
non_sequences=non_seqs,
strict=True,
n_steps=self.max_answer_word)
return T.transpose(hid_and_out[1], (1,0,2))
评论列表
文章目录