def __call__(self, w, train=True, dpratio=0.5):
x = self.embed(w)
self.maybe_init_state(len(x.data), x.data.dtype)
for i in range(self.num_layers):
if self.ignore_label is not None:
enable = (x.data != 0)
c = F.dropout(self.get_c(i), train=train, ratio=dpratio)
h = F.dropout(self.get_h(i), train=train, ratio=dpratio)
x = F.dropout(x, train=train, ratio=dpratio)
c, h = self.get_l(i)(c, h, x)
if self.ignore_label != None:
self.set_c(i, F.where(enable, c, self.get_c(i)))
self.set_h(i, F.where(enable, h, self.get_h(i)))
else:
self.set_c(i, c)
self.set_h(i, h)
x = self.get_h(i)
python类where()的实例源码
def attend(self, query, key, value, mask, minfs=None):
"""
Input shapes:
q=(b, units, dec_l), k=(b, units, enc_l),
v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
"""
# Calculate Attention Scores with Mask for Zero-padded Areas
pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l)
minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
if minfs is None else minfs
pre_a = F.where(mask, pre_a, minfs)
a = F.softmax(pre_a, axis=2)
# if values in axis=2 are all -inf, they become nan. thus do re-mask.
a = F.where(self.xp.isnan(a.data),
self.xp.zeros(a.shape, dtype=a.dtype), a)
reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l)
# Calculate Weighted Sum
pre_c = F.broadcast_to(reshaped_a, value.shape) * value
c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1)
return c
def __call__(self, w, train=True, dpratio=0.5):
x = self.embed(w)
self.maybe_init_state(len(x.data), x.data.dtype)
for i in range(self.num_layers):
if self.ignore_label is not None:
enable = (x.data != 0)
c = F.dropout(self.get_c(i), train=train, ratio=dpratio)
h = F.dropout(self.get_h(i), train=train, ratio=dpratio)
x = F.dropout(x, train=train, ratio=dpratio)
c, h = self.get_l(i)(c, h, x)
if self.ignore_label != None:
self.set_c(i, F.where(enable, c, self.get_c(i)))
self.set_h(i, F.where(enable, h, self.get_h(i)))
else:
self.set_c(i, c)
self.set_h(i, h)
x = self.get_h(i)
x = F.dropout(x, train=train, ratio=dpratio)
return self.hy(x)
def post_decode_once(self, output, state, train=True):
lengths = state['lengths']
if self.byte:
itos = self.vocab.itos
consumed = self.xp.array([[len(itos(oi)) + 1]
for oi in output.tolist()])
lengths -= consumed
else:
lengths -= 1
flags = chainer.Variable(lengths.data >= 0, volatile=not train)
lengths = F.where(flags, lengths, self.zeros)
state['lengths'] = lengths
return state
def post_decode_once(self, output, state, train=True):
lengths = state['lengths']
if self.byte:
itos = self.vocab.itos
consumed = self.xp.array([[len(itos(oi)) + 1]
for oi in output.tolist()])
lengths -= consumed
else:
lengths -= 1
flags = chainer.Variable(lengths.data >= 0, volatile=not train)
lengths = F.where(flags, lengths, self.zeros)
state['lengths'] = lengths
return state
def _attend(self, p):
weight = F.batch_matmul(self.source_hiddens, p)
weight = F.where(self.mask, weight, self.minf)
attention = F.softmax(weight)
return attention
def _attend(self, p):
p = self.xh(p)
p = F.expand_dims(p, 1)
p = F.broadcast_to(p, self.shape2)
h = F.tanh(self.h + p)
shape3 = (self.batchsize * self.src_len, self.dim_hid)
h_reshaped = F.reshape(h, shape3)
weight_reshaped = self.hw(h_reshaped)
weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1))
weight = F.where(self.mask, weight, self.minf)
attention = F.softmax(weight)
return attention
def check_forward(self, c_data, x_data, y_data):
c = chainer.Variable(c_data)
x = chainer.Variable(x_data)
y = chainer.Variable(y_data)
z = functions.where(c, x, y)
self.assertEqual(x.data.shape, z.data.shape)
for i in numpy.ndindex(c.data.shape):
if c.data[i]:
self.assertEqual(x.data[i], z.data[i])
else:
self.assertEqual(y.data[i], z.data[i])
def __call__(self, x, mask=None):
x = F.dropout(x, ratio=self.dropout)
out, pregate = F.split_axis(self.conv(x), 2, axis=1)
out = out * F.sigmoid(pregate)
if mask is not None:
out *= mask
return out
# TODO: For layers whose output is not directly fed to a gated linear
# unit, we initialize weights from N (0, p 1/nl) where nl is the number of
# input connections for each neuron.
def __accuracy(self, y, t):
xp = self.xp
b, c, n = y.data.shape
v = np.arange(c, dtype=np.float32).reshape((1, -1, 1)).repeat(b, axis=0).repeat(n, axis=2)
v = Variable(xp.asarray(v), volatile=True)
r = F.sum(v * F.softmax(Variable(y.data, volatile=True)), axis=1)
c = Variable(t.data >= 0, volatile=True)
t = Variable(t.data.astype(np.float32), volatile=True)
r = F.where(c, r, t)
return F.sum(((r - t) * self.rating_unit) ** 2)