def __call__(self, ht, xs, d_bar_s_1):
#ht:encoder?????????????????
#batch_size * n_words * in_size
#xs:??????
if d_bar_s_1 == None:
d_bar_s_1 = np.zeros(self.in_size)
ht_T = list(map(F.transpose, ht))
phi_ht = list(map(W1, ht_T))
d_s = rnn(d_bar_s_1, y_s_1)
phi_d = F.transpose_sequence(W2(F.transpose_sequence(d_s)))
u_st = list(map(lambda x: phi_d*x, phi_ht)) #(4)
sum_u = F.sum(u_st)
alpha_st = list(map(lambda x:x/sum_u, u_st)) #(3)
z_s = F.argmax(alpha_st, axis=0)
c_s = F.sum(list(map(lambda x,y:x*y , alpha_st, ht))) #(2)
d_bar_s = F.relu(W3(F.concat([c_s, d_s])))
return d_bar_s, d_s, c_s, z_s
python类transpose_sequence()的实例源码
def __call__(self, x, hs):
batch, dim = x.shape
alphas = 0
_sum = 0
for h in F.transpose_sequence(hs[:batch]):
size = h.shape[0]
if size < batch:
h = F.vstack([h, variable.Variable(
self.xp.zeros((batch - size, h.shape[1]), dtype='f'))])
score = self._score_func(x, h)
e = F.exp(score)
_sum += e
alphas += batch_matmul(h, e)
c = F.reshape(batch_matmul(F.reshape(alphas, (batch, dim)),
(1 / _sum)), (batch, dim))
return c
def __call__(self, xs, ys):
xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
xs = F.transpose_sequence(xs)
ys = permutate_list(ys, argsort_list_descent(ys), inv=False)
ys = F.transpose_sequence(ys)
return super(CRF, self).__call__(xs, ys)
def argmax(self, xs):
xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
xs = F.transpose_sequence(xs)
score, path = super(CRF, self).argmax(xs)
path = F.transpose_sequence(path)
return score, path
def __call__(self, xs, ys, reduce='mean'):
indices = argsort_list_descent(xs)
xs = permutate_list(xs, indices, inv=False)
xs = F.transpose_sequence(xs)
ys = permutate_list(ys, indices, inv=False)
ys = F.transpose_sequence(ys)
return F.crf1d(self.cost, xs, ys, reduce)
def convolution(self, xs, train):
pad = Variable(self.xp.zeros((1, self.in_size), dtype=self.xp.float32), volatile=not train)
xs_prev = [F.concat([pad, x[:-1,:]], axis=0) for x in xs]
conv_output = [self.W(x1) + self.V(x2) for x1, x2 in zip(xs_prev, xs)]
ret = F.transpose_sequence(conv_output)
return ret
def pooling(self, c, xs, train):
"""
implement fo-pooling
(seemingly the best option when compared to ifo/f-pooling)
"""
c_prev = c
hs = []
for x in xs:
batch = x.shape[0]
w0, w1, w2 = F.split_axis(x, 3, axis=1)
z = F.tanh(w0)
f = F.sigmoid(w1)
o = F.sigmoid(w2)
c_prev_rest = None
if c_prev is None:
c = (1 - f) * z
else:
# when sequence length differs within the minibatch
if c_prev.shape[0] > batch:
c_prev, c_prev_rest = F.split_axis(c_prev, [batch], axis=0)
# if train:
# zoneout_mask = (0.1 < self.xp.random.rand(*f.shape))
# c = f * c_prev + (1 - f) * z * zoneout_mask
# else:
# c = f * c_prev + (1 - f) * z
c = f * c_prev + (1 - f) * z
h = o * c
if c_prev_rest is not None:
c = F.concat([c, c_prev_rest], axis=0)
hs.append(h)
c_prev = c
return c, F.transpose_sequence(hs)
def translate(self, xs, max_length=100):
print("Now translating")
batch = len(xs)
print("batch",batch)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
wxs = [np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs]
wx_len = [len(wx) for wx in wxs]
wx_section = np.cumsum(wx_len[:-1])
valid_wx_section = np.insert(wx_section, 0, 0)
cxs = [np.array([source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs]
wexs = sequence_embed(self.embed_xw, wxs)
cexs = sequence_embed(self.embed_xc, cxs)
wexs_f = wexs
wexs_b = [wex[::-1] for wex in wexs]
cexs_f = cexs
cexs_b = [cex[::-1] for cex in cexs]
_, hfw = self.encoder_fw(None, wexs_f)
_, hbw = self.encoder_bw(None, wexs_b)
_, hfc = self.encoder_fc(None, cexs_f)
_, hbc = self.encoder_bc(None, cexs_b)
hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
htw = list(map(lambda x,y: F.concat([x, y], axis=1), hfw, hbw))
htc = list(map(lambda x,y: F.concat([x, y], axis=1), hfc, hbc))
ht = list(map(lambda x,y: F.concat([x, y], axis=0), htw, htc))
ys = self.xp.full(batch, EOS, 'i')
result = []
h=None
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys)
cys = chainer.functions.concat(h_list, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
h = F.transpose_sequence(h_list)[-1]
h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1]))
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate(self, xs, max_length=100):
print("Now translating")
batch = len(xs)
print("batch",batch)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
wxs = [np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs]
wx_len = [len(wx) for wx in wxs]
wx_section = np.cumsum(wx_len[:-1])
valid_wx_section = np.insert(wx_section, 0, 0)
cxs = [np.array([source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs]
wexs = sequence_embed(self.embed_xw, wxs)
cexs = sequence_embed(self.embed_xc, cxs)
wexs_f = wexs
wexs_b = [wex[::-1] for wex in wexs]
cexs_f = cexs
cexs_b = [cex[::-1] for cex in cexs]
_, hfw = self.encoder_fw(None, wexs_f)
h1, hbw = self.encoder_bw(None, wexs_b)
_, hfc = self.encoder_fc(None, cexs_f)
h2, hbc = self.encoder_bc(None, cexs_b)
hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
htw = list(map(lambda x,y: F.concat([x, y], axis=1), hfw, hbw))
htc = list(map(lambda x,y: F.concat([x, y], axis=1), hfc, hbc))
ht = list(map(lambda x,y: F.concat([x, y], axis=0), htw, htc))
ys = self.xp.full(batch, EOS, 'i')
result = []
h = F.concat([h1, h2], axis=2)
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys)
cys = chainer.functions.concat(h_list, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
h = F.transpose_sequence(h_list)[-1]
h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1]))
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def predict(self, y_list, t, compute_loss=True):
predict_list = []
cnt = 0
for n_len in self.n_length:
pred = F.concat(y_list[cnt:cnt + n_len], axis=0)
predict_list.append(pred)
cnt += n_len
inds = self.inds
# inds_trans = [inds[i] for i in inds]
inds_rev = sorted([(i, ind) for i, ind in enumerate(inds)], key=lambda x: x[1])
hs = [predict_list[i] for i in inds]
ts_original = None
if compute_loss:
ts_original = [self.xp.array(t[i], self.xp.int32) for i in inds]
hs = F.transpose_sequence(hs)
loss = None
if compute_loss and ts_original is not None:
# loss
ts = F.transpose_sequence(ts_original)
loss = self.lossfun(hs, ts)
# predict
score, predicts_trans = self.lossfun.argmax(hs)
predicts = F.transpose_sequence(predicts_trans)
gold_predict_pairs = []
if compute_loss:
for pred, gold in zip(predicts, ts_original):
pred = to_cpu(pred.data)
gold = to_cpu(gold)
gold_predict_pairs.append([gold, pred])
else:
for pred in predicts:
pred = to_cpu(pred.data)
gold_predict_pairs.append([pred])
gold_predict_pairs = [gold_predict_pairs[e_i] for e_i, _ in inds_rev]
self.y = gold_predict_pairs
return gold_predict_pairs, loss
def argmax(self, xs):
indices = argsort_list_descent(xs)
xs = permutate_list(xs, indices, inv=False)
xs = F.transpose_sequence(xs)
score, path = F.argmax_crf1d(self.cost, xs)
path = F.transpose_sequence(path)
path = permutate_list(path, indices, inv=True)
score = F.permutate(score, indices, inv=True)
return score, path
# def argnmax(self, xs, n=10):
# cost = cuda.to_cpu(self.cost.data)
# xs = permutate_list(xs, argsort_list_descent(xs), inv=False)
# xs = [cuda.to_cpu(x.data) for x in xs]
#
# scores = []
# paths = []
#
# for _xs in xs:
# alphas = [_xs[0]]
# for x in _xs[1:]:
# alpha = np.max(alphas[-1] + cost, axis=1) + x
# alphas.append(alpha)
#
# _scores = []
# _paths = []
# _end = len(_xs) - 1
# buf = n
#
# c = queue.PriorityQueue()
# q = queue.PriorityQueue()
# x = _xs[_end]
# for i in range(x.shape[0]):
# q.put((-alphas[_end][i], -x[i], _end,
# np.random.random(), np.array([i], np.int32)))
# while not q.empty() and c.qsize() < n + buf:
# beta, score, time, r, path = q.get()
# if time == 0:
# c.put((score, r, path))
# continue
# t = time - 1
# x = _xs[t]
# for i in range(x.shape[0]):
# _trans = score - cost[i, path[-1]]
# _beta = -alphas[t][i] + _trans
# _score = _trans - x[i]
# q.put((_beta, _score, t,
# np.random.random(), np.append(path, i)))
# while not c.empty() and len(_paths) < n:
# score, r, path = c.get()
# _scores.append(-score)
# _paths.append(path[::-1])
# scores.append(_scores)
# paths.append(_paths)
#
# return scores, paths