def forward(self, ws, ss, ps, ls, dep_ts=None):
batchsize, slen = ws.shape
xp = chainer.cuda.get_array_module(ws[0])
wss = self.emb_word(ws)
sss = F.reshape(self.emb_suf(ss), (batchsize, slen, 4 * self.afix_dim))
pss = F.reshape(self.emb_prf(ps), (batchsize, slen, 4 * self.afix_dim))
ins = F.dropout(F.concat([wss, sss, pss], 2), self.dropout_ratio, train=self.train)
xs_f = F.transpose(ins, (1, 0, 2))
xs_b = xs_f[::-1]
cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
# (batch, length, hidden_dim)
hs = F.transpose(F.concat([hs_f, hs_b[::-1]], 2), (1, 0, 2))
dep_ys = self.biaffine_arc(
F.elu(F.dropout(self.arc_dep(hs), 0.32, train=self.train)),
F.elu(F.dropout(self.arc_head(hs), 0.32, train=self.train)))
if dep_ts is not None and random.random >= 0.5:
heads = dep_ts
else:
heads = F.flatten(F.argmax(dep_ys, axis=2)) + \
xp.repeat(xp.arange(0, batchsize * slen, slen), slen)
hs = F.reshape(hs, (batchsize * slen, -1))
heads = F.permutate(
F.elu(F.dropout(
self.rel_head(hs), 0.32, train=self.train)), heads)
childs = F.elu(F.dropout(self.rel_dep(hs), 0.32, train=self.train))
cat_ys = self.biaffine_tag(childs, heads)
dep_ys = F.split_axis(dep_ys, batchsize, 0) if batchsize > 1 else [dep_ys]
dep_ys = [F.reshape(v, v.shape[1:])[:l, :l] for v, l in zip(dep_ys, ls)]
cat_ys = F.split_axis(cat_ys, batchsize, 0) if batchsize > 1 else [cat_ys]
cat_ys = [v[:l] for v, l in zip(cat_ys, ls)]
return cat_ys, dep_ys
评论列表
文章目录