def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
python类split_axis()的实例源码
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def compute_vecs(self, word_ids, word_boundaries, phrase_num,
char_vecs=None):
word_ids = my_variable(word_ids, volatile=not self.train)
word_embs = self.emb(word_ids) # total_len x dim
word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim))
if self.word_level_flag and char_vecs is not None:
# print(char_vecs.data.shape)
# print(word_embs.data.shape)
word_embs = F.concat([word_embs, char_vecs], axis=1)
# print(word_embs.data.shape)
dim = self.emb_dim + self.add_dim
word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim))
# 1 x 1 x total_len x dim
# convolution
word_emb_conv = self.conv(word_embs_reshape)
# 1 x dim x total_len x 1
word_emb_conv_reshape = F.reshape(word_emb_conv,
(self.hidden_dim, -1))
# max
word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape,
word_boundaries, axis=1)
embs = [F.max(word_emb_conv_word, axis=1)
for i, word_emb_conv_word in enumerate(word_emb_conv_reshape) if i % 2 == 1]
embs = F.concat(embs, axis=0)
phrase_emb_conv = F.reshape(embs,
(phrase_num, self.hidden_dim))
return phrase_emb_conv
def forward(self, ws, cs):
batchsize, length, max_word_len = cs.shape
ws = self.emb_word(ws) # (batch, length, word_dim)
cs = F.reshape(
F.max_pooling_2d(
self.conv_char(
F.reshape(
self.emb_char(cs),
(batchsize * length, 1, max_word_len, 50))), (max_word_len, 1)),
(batchsize, length, self.char_dim))
hs = F.transpose(F.concat([ws, cs], 2), (1, 0, 2))
hs = F.dropout(hs, self.dropout_ratio, train=self.train)
hs = F.split_axis(hs, length, 0)
hs_f = []
hs_b = []
self._init_state()
for h_in_f, h_in_b in zip(hs, reversed(hs)):
h_f = self.lstm_f2(self.lstm_f1(F.reshape(h_in_f, (batchsize, -1))))
hs_f.append(h_f)
h_b = self.lstm_b2(self.lstm_b1(F.reshape(h_in_b, (batchsize, -1))))
hs_b.append(h_b)
hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, reversed(hs_b))]
cat_ys = [self.linear_cat2(F.dropout(
F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs]
hs = [F.reshape(h, (length, -1)) for h in \
F.split_axis(F.transpose(F.stack(hs, 2), (0, 2, 1)), batchsize, 0)]
dep_ys = [self.biaffine(
F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)),
F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs]
return cat_ys, dep_ys
def __call__(self, ws, cs, cat_ts, dep_ts):
batchsize, length = cat_ts.shape
cat_ys, dep_ys = self.forward(ws, cs)
cat_ys = cat_ys[1:-1]
cat_ts = [F.reshape(x, (batchsize,)) for x \
in F.split_axis(F.transpose(cat_ts), length, 0)]
assert len(cat_ys) == len(cat_ts)
cat_loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(cat_ys, cat_ts)])
cat_acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(cat_ys, cat_ts)])
# hs [(length, hidden_dim), ...]
dep_ys = [x[1:-1] for x in dep_ys]
dep_ts = [F.reshape(x, (length,)) for x in F.split_axis(dep_ts, batchsize, 0)]
dep_loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(dep_ys, dep_ts)])
dep_acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(dep_ys, dep_ts)])
cat_acc /= length
dep_acc /= batchsize
chainer.report({
"tagging_loss": cat_loss,
"tagging_accuracy": cat_acc,
"parsing_loss": dep_loss,
"parsing_accuracy": dep_acc
}, self)
return cat_loss + dep_loss
def forward(self, ws, ss, ps, dep_ts=None):
batchsize = len(ws)
xp = chainer.cuda.get_array_module(ws[0])
split = scanl(lambda x,y: x+y, 0, [w.shape[0] for w in ws])[1:-1]
wss = self.emb_word(F.hstack(ws))
sss = F.reshape(self.emb_suf(F.vstack(ss)), (-1, 4 * self.afix_dim))
pss = F.reshape(self.emb_prf(F.vstack(ps)), (-1, 4 * self.afix_dim))
ins = F.dropout(F.concat([wss, sss, pss]), self.dropout_ratio, train=self.train)
xs_f = list(F.split_axis(ins, split, 0))
xs_b = [x[::-1] for x in xs_f]
cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
hs_b = [x[::-1] for x in hs_b]
# ys: [(sentence length, number of category)]
hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]
dep_ys = [self.biaffine_arc(
F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)),
F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs]
# if dep_ts is not None and random.random >= 0.5:
if dep_ts is not None:
heads = dep_ts
else:
heads = [F.argmax(y, axis=1) for y in dep_ys]
heads = F.elu(F.dropout(
self.rel_head(
F.vstack([F.embed_id(t, h, ignore_label=IGNORE) \
for h, t in zip(hs, heads)])),
0.32, train=self.train))
childs = F.elu(F.dropout(self.rel_dep(F.vstack(hs)), 0.32, train=self.train))
cat_ys = self.biaffine_tag(childs, heads)
cat_ys = list(F.split_axis(cat_ys, split, 0))
return cat_ys, dep_ys
def set_state(self, state):
self.x, self.c, self.h = F.split_axis(
state, (self.in_size, self.in_size + self.size), axis=1)
def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
fs = [self.extractor.process(x) for x in xs]
ws, ss, ps = concat_examples(fs, padding=-1)
ys = self.forward(ws, ss, ps)
ys = F.transpose(F.stack(ys, 2), (0, 2, 1))
return [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \
zip(xs, F.split_axis(ys, batchsize, 0))]
def __call__(self, ws, ss, ps, ts):
"""
xs [(w,s,p,y), ..., ]
w: word, s: suffix, p: prefix, y: label
"""
batchsize, length = ws.shape
cat_ys, dep_ys = self.forward(ws, ss, ps)[1:-1]
cat_ts = [F.reshape(x, (batchsize,)) for x \
in F.split_axis(F.transpose(cat_ts), length, 0)]
dep_ts = [F.reshape(x, (batchsize,)) for x \
in F.split_axis(F.transpose(dep_ts), length, 0)]
cat_loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(cat_ys, cat_ts)])
cat_acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(cat_ys, cat_ts)])
dep_loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(dep_ys, dep_ts)])
dep_acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(dep_ys, dep_ts)])
cat_acc /= length
dep_acc /= length
chainer.report({
"tagging_loss": cat_loss,
"tagging_accuracy": cat_acc,
"parsing_loss": dep_loss,
"parsing_accuracy": dep_acc
}, self)
return cat_loss + dep_loss
def embed(self, source, train=True):
xp = self.xp
mask = xp.expand_dims(source != -1, -1)
self.mask = chainer.Variable(mask, volatile=not train)
x = chainer.Variable(source, volatile=not train)
embs = self.src_emb(x)
embs = F.split_axis(embs, embs.data.shape[1], 1)
return embs
def __call__(self, source, target, lengths=None, train=True):
self.batchsize, self.source_length = source.shape
state = self.encode(source, train=train)
state = self.prepare_decoding(state, lengths, train=train)
y = None
if target is not None:
y = chainer.Variable(target, volatile=not train)
y = F.split_axis(y, y.data.shape[1], 1)
outs, loss = self.decode(state, y, train=train)
return outs, loss
def iaf(self, z, h, lin1, lin2):
ms = F.crelu(lin1(F.concat((z, h), axis=1)))
ms = lin2(ms)
m, s = F.split_axis(ms, 2, axis=1)
s = F.sigmoid(s)
z = s*z + (1-s)*m
# pdb.set_trace()
return z, -F.sum(F.log(s), axis=1)
def iaf(self, z, h, lin1, lin2):
ms = F.crelu(lin1(F.concat((z, h), axis=1)))
ms = lin2(ms)
m, s = F.split_axis(ms, 2, axis=1)
s = F.sigmoid(s)
z = s*z + (1-s)*m
# pdb.set_trace()
return z, -F.sum(F.log(s), axis=1)
def __call__(self, X):
pad = self._kernel_size[1] - 1
WX = self.W(X)
if pad > 0:
WX = WX[..., :-pad]
A, B = functions.split_axis(WX, 2, axis=1)
H = A * functions.sigmoid(B)
return H
# Connections
def __call__(self, x, split_into_variables=True, discard_context=False):
batchsize = x.shape[0]
seq_length = x.shape[3]
# conv
out_data = self.conv_blocks(x)
out_data = functions.reshape(out_data, (batchsize, -1, seq_length))
# rnn
for index, blocks in enumerate(self.rnn_blocks.blocks):
sru = blocks[0]
dropout = blocks[1] if len(blocks) == 2 else None
hidden, cell, context = sru(out_data, self.contexts[index])
if discard_context is False:
self.contexts[index] = context
if dropout is not None:
out_data = dropout(out_data)
# fc
out_data = self.dense_blocks(out_data)
assert out_data.shape[2] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, X, skip_mask=None):
# remove right paddings
# e.g.
# kernel_size = 3
# pad = 2
# input sequence with paddings:
# [0, 0, x1, x2, x3, 0, 0]
# |< t1 >|
# |< t2 >|
# |< t3 >|
pad = self._kernel_size - 1
WX = self.W(X)[..., :-pad]
return self.pool(functions.split_axis(WX, self.num_split, axis=1), skip_mask=skip_mask)
def forward_one_step(self, X, skip_mask=None):
pad = self._kernel_size - 1
WX = self.W(X)[:, :, -pad-1, None]
return self.pool(functions.split_axis(WX, self.num_split, axis=1), skip_mask=skip_mask)
def forward_one_step(self, X, ht_enc):
pad = self._kernel_size - 1
WX = self.W(X)[..., -pad-1, None]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def check_forward(self, x_data, ys_data, indices_or_sections, axis):
x = chainer.Variable(x_data)
ys = functions.split_axis(x, indices_or_sections, axis)
for yd, y in zip(ys_data, ys):
self.assertEqual(y.data.dtype, self.dtype)
self.assertIsInstance(y.data.shape, tuple)
gradient_check.assert_allclose(yd, y.data, atol=0, rtol=0)