def forward(self, pretrained_word_tokens, word_tokens, pos_tokens):
X = []
batch = len(word_tokens)
for i in range(batch):
xs_words_pretrained = \
self.embed[0](self.xp.array(pretrained_word_tokens[i]))
xs_words = self.embed[1](self.xp.array(word_tokens[i]))
xs_words += xs_words_pretrained
xs_tags = self.embed[2](self.xp.array(pos_tokens[i]))
xs = F.concat([
teras_F.dropout(xs_words, self.embed._dropout_ratio),
teras_F.dropout(xs_tags, self.embed._dropout_ratio)])
X.append(xs)
R = self.blstm(X)
R = F.pad_sequence(R)
H_arc_dep = self.mlp_arc_dep(R)
H_arc_head = self.mlp_arc_head(R)
arc_logits = self.arc_biaffine(H_arc_dep, H_arc_head)
arc_logits = F.squeeze(arc_logits, axis=3)
H_label_dep = self.mlp_label_dep(R)
H_label_head = self.mlp_label_head(R)
label_logits = self.label_biaffine(H_label_dep, H_label_head)
return arc_logits, label_logits
python类squeeze()的实例源码
def predict(self, xs):
"""
batch: list of splitted sentences
"""
xs = [self.extractor.process(x) for x in xs]
batchsize = len(xs)
ws, cs, ls = zip(*xs)
ws = map(self.emb_word, ws)
cs = [F.squeeze(
F.max_pooling_2d(
self.conv_char(
F.expand_dims(
self.emb_char(c), 1)), (l, 1)))
for c, l in zip(cs, ls)]
xs_f = [F.dropout(F.concat([w, c]),
self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)]
xs_b = [x[::-1] for x in xs_f]
cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
hs_b = [x[::-1] for x in hs_b]
ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b]))))
for h_f, h_b in zip(hs_f, hs_b)]
return [y.data[1:-1] for y in ys]
def __call__(self, ws, ss, ps, ts):
"""
xs [(w,s,p,y), ..., ]
w: word, s: suffix, p: prefix, y: label
"""
batchsize, length = ts.shape
ys = self.forward(ws, ss, ps)[1:-1]
ts = [F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0)]
loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)])
acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)])
acc /= length
chainer.report({
"loss": loss,
"accuracy": acc
}, self)
return loss
def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
xs = [self.extractor.process(x) for x in xs]
ws, ss, ps = concat_examples(xs, padding=IGNORE)
cat_ys, dep_ys = self.forward(ws, ss, ps)
cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))
cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \
zip(xs, F.split_axis(cat_ys, batchsize, 0))]
dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \
for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))]
return cat_ys, dep_ys
def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, x, z, ze, mask, conv_mask):
att_scale = self.xp.sum(
mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5
pad = self.xp.zeros(
(x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype)
base_x = x
z = F.squeeze(z, axis=3)
# Note: these behaviors of input, output, and attention result
# may refer to the code by authors, which looks little different
# from the paper's saying.
for conv_name, preatt_name in zip(self.conv_names, self.preatt_names):
# Calculate Output of GLU
out = getattr(self, conv_name)(
F.concat([pad, x], axis=2), conv_mask)
# Calcualte Output of Attention using Output of GLU
preatt = seq_linear(getattr(self, preatt_name), out)
query = base_x + preatt
query = F.squeeze(query, axis=3)
c = self.attend(query, z, ze, mask) * att_scale
# Merge Them in Redidual Calculation and Scaling
x = (x + (c + out) * scale05) * scale05
return x
def __call__(self):
mem_optimize = nmtrain.optimization.chainer_mem_optimize
# Calculate Attention vector
a = self.attention(self.S, self.h)
# Calculate context vector
c = F.squeeze(F.batch_matmul(self.S, a, transa=True), axis=2)
# Calculate hidden vector + context
self.ht = self.context_project(F.concat((self.h, c), axis=1))
# Calculate Word probability distribution
y = mem_optimize(self.affine_vocab, F.tanh(self.ht), level=1)
if self.use_lexicon:
y = self.lexicon_model(y, a, self.ht, self.lexicon_matrix)
if nmtrain.environment.is_train():
return nmtrain.models.decoders.Output(y=y)
else:
# Return the vocabulary size output projection
return nmtrain.models.decoders.Output(y=y, a=a)
def __call__(self, y, a, ht, y_lex):
y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
return (y + F.log(y_dict + self.alpha))
#class LinearInterpolationLexicon(chainer.Chain):
# def __init__(self, hidden_size):
# super(LinearInterpolationLexicon, self).__init__(
# perceptron = chainer.links.Linear(hidden_size, 1)
# )
#
# def __call__(self, y, a, ht, y_lex):
# y = F.softmax(y)
# y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
# gamma = F.broadcast_to(F.sigmoid(self.perceptron(ht)), y_dict.data.shape)
# return (gamma * y_dict + (1-gamma) * y)
#
def forward(self, ws, cs, ls, dep_ts=None):
batchsize = len(ws)
xp = chainer.cuda.get_array_module(ws[0])
ws = map(self.emb_word, ws)
cs = [F.squeeze(
F.max_pooling_2d(
self.conv_char(
F.expand_dims(
self.emb_char(c), 1)), (int(l[0]), 1)))
for c, l in zip(cs, ls)]
xs_f = [F.dropout(F.concat([w, c]),
self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)]
xs_b = [x[::-1] for x in xs_f]
cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
hs_b = [x[::-1] for x in hs_b]
hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]
dep_ys = [self.biaffine_arc(
F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)),
F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs]
if dep_ts is not None:
heads = dep_ts
else:
heads = [F.argmax(y, axis=1) for y in dep_ys]
cat_ys = [
self.biaffine_tag(
F.elu(F.dropout(self.rel_dep(h), 0.32, train=self.train)),
F.elu(F.dropout(self.rel_head(
F.embed_id(t, h, ignore_label=IGNORE)), 0.32, train=self.train))) \
for h, t in zip(hs, heads)]
return cat_ys, dep_ys
def forward(self, ws, cs, ls):
"""
xs [(w,s,p,y), ..., ]
w: word, c: char, l: length, y: label
"""
batchsize = len(ws)
# cs: [(sentence length, max word length)]
ws = map(self.emb_word, ws)
# ls: [(sentence length, char dim)]
# before conv: (sent len, 1, max word len, char_size)
# after conv: (sent len, char_size, max word len, 1)
# after max_pool: (sent len, char_size, 1, 1)
cs = [F.squeeze(
F.max_pooling_2d(
self.conv_char(
F.expand_dims(
self.emb_char(c), 1)), (l, 1)))
for c, l in zip(cs, ls)]
# [(sentence length, (word_dim + char_dim))]
xs_f = [F.dropout(F.concat([w, c]),
self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)]
xs_b = [x[::-1] for x in xs_f]
cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
hs_b = [x[::-1] for x in hs_b]
# ys: [(sentence length, number of category)]
hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)]
cat_ys = [self.linear_cat2(F.relu(self.linear_cat1(h))) for h in hs]
dep_ys = [self.biaffine(
F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)),
F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs]
return cat_ys, dep_ys
def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
fs = [self.extractor.process(x) for x in xs]
ws, ss, ps = concat_examples(fs, padding=-1)
ys = self.forward(ws, ss, ps)
ys = F.transpose(F.stack(ys, 2), (0, 2, 1))
return [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \
zip(xs, F.split_axis(ys, batchsize, 0))]
def __call__(self, x):
return functions.squeeze(x, self.axis)
def __call__(self, x, split_into_variables=True, discard_context=False):
batchsize = x.shape[0]
seq_length = x.shape[3]
# conv
out_data = self.conv_blocks(x)
out_data = functions.reshape(out_data, (batchsize, -1, seq_length))
# rnn
for index, blocks in enumerate(self.rnn_blocks.blocks):
sru = blocks[0]
dropout = blocks[1] if len(blocks) == 2 else None
hidden, cell, context = sru(out_data, self.contexts[index])
if discard_context is False:
self.contexts[index] = context
if dropout is not None:
out_data = dropout(out_data)
# fc
out_data = self.dense_blocks(out_data)
assert out_data.shape[2] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 2)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, x):
return functions.squeeze(x, self.axis)
def __call__(self, S, h):
return F.squeeze(F.softmax(F.batch_matmul(S, h)), axis=2)
def __call__(self, S, h):
batch_size, src_len, hidden_size = S.data.shape
S = self.inner_weight(F.reshape(S, (batch_size * src_len, hidden_size)))
S = F.reshape(S, (batch_size, src_len, hidden_size))
a = F.softmax(F.squeeze(F.batch_matmul(S, h), axis = 2))
return a
# MLP layer, as of Bahdanau+ 15
embedding_conv2d.py 文件源码
项目:Multitask-and-Transfer-Learning
作者: AI-ON
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __call__(self, id, x):
W = self.W_embedding(id)
b = F.squeeze(self.b_embedding(id))
# Reshape the vector to be the right dimensions for 2D conv
W = F.reshape(W, (self.out_channels, self.in_channels, self.kh, self.kw))
return F.convolution_2d(x, W, b, self.stride, self.pad)
def __call__(self, xs):
"""
xs [(w,s,p,y), ..., ]
w: word, c: char, l: length, y: label
"""
batchsize = len(xs)
ws, cs, ls, ts = zip(*xs)
# cs: [(sentence length, max word length)]
ws = map(self.emb_word, ws)
# ls: [(sentence length, char dim)]
# cs = map(lambda (c, l): F.sum(self.emb_char(c), 1) / l, zip(cs, ls))
# cs = [F.reshape(F.average_pooling_2d(
# F.expand_dims(self.emb_char(c), 0), (l, 1)), (-1, self.char_dim))
# for c, l in zip(cs, ls)]
# before conv: (sent len, 1, max word len, char_size)
# after conv: (sent len, char_size, max word len, 1)
# after max_pool: (sent len, char_size, 1, 1)
cs = [F.squeeze(
F.max_pooling_2d(
self.conv_char(
F.expand_dims(
self.emb_char(c), 1)), (l, 1)))
for c, l in zip(cs, ls)]
# [(sentence length, (word_dim + char_dim))]
xs_f = [F.dropout(F.concat([w, c]),
self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)]
xs_b = [x[::-1] for x in xs_f]
cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
hs_b = [x[::-1] for x in hs_b]
# ys: [(sentence length, number of category)]
ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b]))))
for h_f, h_b in zip(hs_f, hs_b)]
# ys = [self.linear2(F.relu(
# self.linear1(
# F.squeeze(
# F.transpose(
# F.relu(self.conv1(
# F.reshape(
# F.concat([h_f, h_b]),
# (1, 1, -1, 2 * self.hidden_dim))), (0, 3, 2, 1))
# )))))
# for h_f, h_b in zip(hs_f, hs_b)]
loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)])
acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)])
acc /= batchsize
chainer.report({
"loss": loss,
"accuracy": acc
}, self)
return loss