def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
fs = [self.extractor.process(x)[:2] for x in xs]
ws, cs = concat_examples(fs, padding=IGNORE)
cat_ys, dep_ys = self.forward(ws, cs)
cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
# dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))
cat_ys = [F.log_softmax(
F.reshape(y, (y.shape[1], -1))[1:len(x) + 1]).data for x, y in \
zip(xs, F.split_axis(cat_ys, batchsize, 0))]
dep_ys = [F.log_softmax(y[1:len(x) + 1, :len(x) + 1]).data \
for x, y in zip(xs, dep_ys)]
assert len(cat_ys) == len(dep_ys)
return zip(cat_ys, dep_ys)
python类split_axis()的实例源码
def __call__(self, ws, ss, ps, ts):
"""
xs [(w,s,p,y), ..., ]
w: word, s: suffix, p: prefix, y: label
"""
batchsize, length = ts.shape
ys = self.forward(ws, ss, ps)[1:-1]
ts = [F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0)]
loss = reduce(lambda x, y: x + y,
[F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)])
acc = reduce(lambda x, y: x + y,
[F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)])
acc /= length
chainer.report({
"loss": loss,
"accuracy": acc
}, self)
return loss
def forward(self, ws, ss, ps):
batchsize, length = ws.shape
xp = chainer.cuda.get_array_module(ws[0])
ws = self.emb_word(ws) # (batch, length, word_dim)
ss = F.reshape(self.emb_suf(ss), (batchsize, length, -1))
ps = F.reshape(self.emb_prf(ps), (batchsize, length, -1))
hs = F.transpose(F.concat([ws, ss, ps], 2), (1, 0, 2))
hs = F.dropout(hs, self.dropout_ratio, train=self.train)
hs = F.split_axis(hs, length, 0)
hs_f = []
hs_b = []
self._init_state()
for h_in_f, h_in_b in zip(hs, reversed(hs)):
h_f = self.lstm_f2(self.lstm_f1(F.squeeze(h_in_f, 0)))
hs_f.append(h_f)
h_b = self.lstm_b2(self.lstm_b1(F.squeeze(h_in_b, 0)))
hs_b.append(h_b)
ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b]))))
for h_f, h_b in zip(hs_f, reversed(hs_b))]
return ys
def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
xs = [self.extractor.process(x) for x in xs]
ws, ss, ps = concat_examples(xs, padding=IGNORE)
cat_ys, dep_ys = self.forward(ws, ss, ps)
cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))
cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \
zip(xs, F.split_axis(cat_ys, batchsize, 0))]
dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \
for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))]
return cat_ys, dep_ys
def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, x, split_into_variables=True):
batchsize = x.shape[0]
seq_length = x.shape[3]
out_data = super(AcousticModel, self).__call__(x)
assert out_data.shape[3] == seq_length
# CTC???????RNN???????Variable????????
if split_into_variables:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.reshape(out_data, (batchsize, -1))
out_data = F.split_axis(out_data, seq_length, axis=1)
else:
out_data = F.swapaxes(out_data, 1, 3)
out_data = F.squeeze(out_data, axis=2)
return out_data
def __call__(self, X, ht_enc):
pad = self._kernel_size - 1
WX = self.W(X)
if pad > 0:
WX = WX[..., :-pad]
Vh = self.V(ht_enc)
# copy Vh
# e.g.
# WX = [[[ 0 1 2]
# [ 3 4 5]
# [ 6 7 8]
# Vh = [[11, 12, 13]]
#
# Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX)
#
# WX = [[[ 0 1 2]
# [ 3 4 5]
# [ 6 7 8]
# Vh = [[[ 11 11 11]
# [ 12 12 12]
# [ 13 13 13]
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def _setup_slice(self, layer):
if layer.slice_param.HasField('axis'):
axis = layer.slice_param.axis
elif layer.slice_param.HasField('slice_dim'):
axis = layer.slice_param.slice_dim
else:
axis = 1
if layer.slice_param.slice_point:
indices_or_sections = list(layer.slice_param.slice_point)
else:
indices_or_sections = len(list(layer.top))
self.forwards[layer.name] = _SingleArgumentFunction(
functions.split_axis,
indices_or_sections=indices_or_sections,
axis=axis
)
self._add_layer(layer)
def initialize_entities(self, entities, max_entnum, train=True):
e2sD = {}
old2newD = {}
if train:
news = self.xp.random.randint(0, max_entnum, len(entities))
else:
news = entities
new_e_L = []
for new, entity in zip(news, entities):
old2newD[entity] = int(new)
new_e_L.append(new)
es_L = F.split_axis(
self.embed(chainer.Variable(self.xp.array(new_e_L, dtype=np.int32), volatile=not train)),
len(new_e_L), axis=0)
if len(new_e_L) <= 1:
es_L = [es_L]
for new_e, es in zip(new_e_L, es_L):
e2sD[new_e] = es
return old2newD, e2sD
def __call__(self, chars):
if not isinstance(chars, (tuple, list)):
chars = [chars]
char_ids, boundaries = self._create_sequence(chars)
x = self.embed(self.xp.array(char_ids))
x = F.dropout(x, self._dropout)
length, dim = x.shape
C = self.conv(F.reshape(x, (1, 1, length, dim)))
# C.shape -> (1, out_size, length, 1)
C = F.split_axis(F.transpose(F.reshape(C, (self.out_size, length))),
boundaries, axis=0)
ys = F.max(F.pad_sequence(
[matrix for i, matrix in enumerate(C) if i % 2 == 1],
padding=-np.inf), axis=1) # max over time pooling
# assert len(chars) == ys.shape[0]
return ys
def calc_log_posterior(theta, x, n=None):
"""Calculate unnormalized log posterior, ``log p(theta | x) + C``
Args:
theta(chainer.Variable): model parameters
x(numpy.ndarray): sample data
n(int): total data size
Returns:
chainer.Variable: Variable that holding unnormalized log posterior,
``log p(theta | x) + C`` of shape ``()``
"""
theta1, theta2 = F.split_axis(theta, 2, 0)
log_prior1 = F.sum(F.log(gaussian.gaussian_likelihood(theta1, 0, VAR1)))
log_prior2 = F.sum(F.log(gaussian.gaussian_likelihood(theta2, 0, VAR2)))
prob1 = gaussian.gaussian_likelihood(x, theta1, VAR_X)
prob2 = gaussian.gaussian_likelihood(x, theta1 + theta2, VAR_X)
log_likelihood = F.sum(F.log(prob1 / 2 + prob2 / 2))
if n is not None:
log_likelihood *= n / len(x)
return log_prior1 + log_prior2 + log_likelihood
def __call__(self, X):
# remove right paddings
# e.g.
# kernel_size = 3
# pad = 2
# input sequence with paddings:
# [0, 0, x1, x2, x3, 0, 0]
# |< t1 >|
# |< t2 >|
# |< t3 >|
pad = self._kernel_size - 1
WX = self.W(X)[:, :, :-pad]
A, B = functions.split_axis(WX, 2, axis=1)
self.H = A * functions.sigmoid(B)
return self.H
def __call__(self, x):
if not hasattr(self, 'encoding') or self.encoding is None:
self.batch_size = x.shape[0]
self.init()
dims = len(x.shape) - 1
f, z, o = F.split_axis(self.pre(x), 3, axis=dims)
f = F.sigmoid(f)
z = (1 - f) * F.tanh(z)
o = F.sigmoid(o)
if dims == 2:
self.c = strnn(f, z, self.c[:self.batch_size])
else:
self.c = f * self.c + z
if self.attention:
context = attention_sum(self.encoding, self.c)
self.h = o * self.o(F.concat((self.c, context), axis=dims))
else:
self.h = self.c * o
self.x = x
return self.h
def __call__(self, x, margin_factor=1.0, train=True):
"""
Embed samples using the CNN, then calculate distances and triplet loss.
x is a batch of size 3n following the form:
| anchor_1 |
| [...] |
| anchor_n |
| positive_1 |
| [...] |
| positive_n |
| negative_1 |
| [...] |
| negative_n |
"""
anc, pos, neg = (self.embed(h) for h in F.split_axis(x, 3, 0))
dist_pos, dist_neg = self.squared_distance(anc, pos, neg)
mf = margin_factor if train else 1.0 # no margin when testing
return self.compute_loss(dist_pos, dist_neg, mf)
lstm_encoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __call__(self, s, xs):
"""Calculate all hidden states and cell states.
Args:
s (~chainer.Variable or None): Initial (hidden & cell) states. If ``None``
is specified zero-vector is used.
xs (list of ~chianer.Variable): List of input sequences.
Each element ``xs[i]`` is a :class:`chainer.Variable` holding
a sequence.
Return:
(hy,cy): a pair of hidden and cell states at the end of the sequence,
ys: a hidden state sequence at the last layer
"""
if len(xs) > 1:
sections = np.cumsum(np.array([len(x) for x in xs[:-1]], dtype=np.int32))
xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0)
else:
xs = [ self.embed(xs[0]) ]
if s is not None:
hy, cy, ys = self.lstm(s[0], s[1], xs)
else:
hy, cy, ys = self.lstm(None, None, xs)
return (hy,cy), ys
lstm_encoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def __call__(self, s, xs):
"""Calculate all hidden states and cell states.
Args:
s (~chainer.Variable or None): Initial (hidden & cell) states. If ``None``
is specified zero-vector is used.
xs (list of ~chianer.Variable): List of input sequences.
Each element ``xs[i]`` is a :class:`chainer.Variable` holding
a sequence.
Return:
(hy,cy): a pair of hidden and cell states at the end of the sequence,
ys: a hidden state sequence at the last layer
"""
if len(xs) > 1:
sections = np.cumsum(np.array([len(x) for x in xs[:-1]], dtype=np.int32))
xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0)
else:
xs = [ self.embed(xs[0]) ]
if s is not None:
hy, cy, ys = self.lstm(s[0], s[1], xs)
else:
hy, cy, ys = self.lstm(None, None, xs)
return (hy,cy), ys
lstm_encoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __call__(self, s, xs):
"""Calculate all hidden states and cell states.
Args:
s (~chainer.Variable or None): Initial (hidden & cell) states. If ``None``
is specified zero-vector is used.
xs (list of ~chianer.Variable): List of input sequences.
Each element ``xs[i]`` is a :class:`chainer.Variable` holding
a sequence.
Return:
(hy,cy): a pair of hidden and cell states at the end of the sequence,
ys: a hidden state sequence at the last layer
"""
if len(xs) > 1:
sections = np.cumsum(np.array([len(x) for x in xs[:-1]], dtype=np.int32))
xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0)
else:
xs = [ self.embed(xs[0]) ]
if s is not None:
hy, cy, ys = self.lstm(s[0], s[1], xs)
else:
hy, cy, ys = self.lstm(None, None, xs)
return (hy,cy), ys
lstm_encoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __call__(self, s, xs):
"""Calculate all hidden states and cell states.
Args:
s (~chainer.Variable or None): Initial (hidden & cell) states. If ``None``
is specified zero-vector is used.
xs (list of ~chianer.Variable): List of input sequences.
Each element ``xs[i]`` is a :class:`chainer.Variable` holding
a sequence.
Return:
(hy,cy): a pair of hidden and cell states at the end of the sequence,
ys: a hidden state sequence at the last layer
"""
if len(xs) > 1:
sections = np.cumsum(np.array([len(x) for x in xs[:-1]], dtype=np.int32))
xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0)
else:
xs = [ self.embed(xs[0]) ]
if s is not None:
hy, cy, ys = self.lstm(s[0], s[1], xs)
else:
hy, cy, ys = self.lstm(None, None, xs)
return (hy,cy), ys
def predict(self, input_x):
if isinstance(input_x, chainer.Variable):
device = cuda.get_device(input_x.data)
else:
device = cuda.get_device(input_x)
xp = self.predictor.xp
with device:
output = self.predictor(input_x)
batch_size, input_channel, input_h, input_w = input_x.shape
batch_size, _, grid_h, grid_w = output.shape
x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2)
x = F.sigmoid(x)
y = F.sigmoid(y)
conf = F.sigmoid(conf)
prob = F.transpose(prob, (0, 2, 1, 3, 4))
prob = F.softmax(prob)
prob = F.transpose(prob, (0, 2, 1, 3, 4))
# convert coordinates to those on the image
x_shift = xp.asarray(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape))
y_shift = xp.asarray(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape))
w_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape))
h_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape))
box_x = (x + x_shift) / grid_w
box_y = (y + y_shift) / grid_h
box_w = F.exp(w) * w_anchor / grid_w
box_h = F.exp(h) * h_anchor / grid_h
return box_x, box_y, box_w, box_h, conf, prob
def predict(self, input_x):
if isinstance(input_x, chainer.Variable):
device = cuda.get_device(input_x.data)
else:
device = cuda.get_device(input_x)
xp = self.predictor.xp
with device:
output = self.predictor(input_x)
batch_size, input_channel, input_h, input_w = input_x.shape
batch_size, _, grid_h, grid_w = output.shape
x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2)
x = F.sigmoid(x)
y = F.sigmoid(y)
conf = F.sigmoid(conf)
prob = F.transpose(prob, (0, 2, 1, 3, 4))
prob = F.softmax(prob)
prob = F.transpose(prob, (0, 2, 1, 3, 4))
# convert coordinates to those on the image
x_shift = xp.asarray(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape))
y_shift = xp.asarray(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape))
w_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape))
h_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape))
box_x = (x + x_shift) / grid_w
box_y = (y + y_shift) / grid_h
box_w = F.exp(w) * w_anchor / grid_w
box_h = F.exp(h) * h_anchor / grid_h
return box_x, box_y, box_w, box_h, conf, prob
def makeEmbedBatch(self, xs, reverse=False):
if reverse:
xs = [xp.asarray(x[::-1], dtype=xp.int32) for x in xs]
elif not reverse:
# xs = xp.asarray(xs,dtype=xp.int32)
xs = [xp.asarray(x, dtype=xp.int32) for x in xs]
section_pre = np.array([len(x) for x in xs[:-1]], dtype=np.int32)
sections = np.cumsum(section_pre) # CuPy does not have cumsum()
xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0)
return xs
def vectorize(args,encdec,sent_arr):
tt_batch = [[encdec.vocab.stoi(char) for char in word_arr.split(" ")] for word_arr in sent_arr]
mu_arr,var_arr = encdec.encode(tt_batch)
mu_arr = mu_arr[0]
mu_arr = F.split_axis(mu_arr, len(sent_arr), axis=0)
var_arr = var_arr[0]
var_arr = F.split_axis(var_arr, len(sent_arr), axis=0)
# print("cossim:{}".format(cosSim(mu_arr[0].data[0],mu_arr[1].data[0])))
return mu_arr,var_arr
def weighted_cross_entropy(p,t,weight_arr,sec_arr,weigh_flag=True):
print("p:{}".format(p.data.shape))
b = np.zeros(p.shape,dtype=np.float32)
b[np.arange(p.shape[0]), t] = 1
soft_arr = F.softmax(p)
log_arr = -F.log(soft_arr)
xent = b*log_arr
#
# print("sec_arr:{}".format(sec_arr))
# print("xent_shape:{}".format(xent.data.shape))
xent = F.split_axis(xent,sec_arr,axis=0)
print([xent_e.data.shape[0] for xent_e in xent])
x_sum = [F.reshape(F.sum(xent_e)/xent_e.data.shape[0],(1,1)) for xent_e in xent]
# print("x_sum:{}".format([x_e.data for x_e in x_sum]))
xent = F.concat(x_sum,axis=0)
#
# print("xent1:{}".format(xent.data))
xent = F.max(xent,axis=1)/p.shape[0]
# print("xent2:{}".format(xent.data))
if not weigh_flag:
return F.sum(xent)
# print("wei_arr:{}".format(weight_arr))
# print("wei_arr:{}".format(weight_arr.data.shape))
print("xent3:{}".format(xent.data.shape))
wxent= F.matmul(weight_arr,xent,transa=True)
wxent = F.sum(F.sum(wxent,axis=0),axis=0)
print("wxent:{}".format(wxent.data))
return wxent
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
h, _ = self.encoder(None, exs)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs_f = xs
xs_b = [x[::-1] for x in xs]
exs_f = sequence_embed(self.embed_x, xs_f)
exs_b = sequence_embed(self.embed_x, xs_b)
fx, _ = self.encoder_f(None, exs_f)
bx, _ = self.encoder_b(None, exs_b)
h = F.concat([fx, bx], axis=2)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def sequence_embed(embed, xs):
x_len = [len(x) for x in xs]
x_section = np.cumsum(x_len[:-1])
ex = embed(F.concat(xs, axis=0))
exs = F.split_axis(ex, x_section, 0)
return exs