def __call__(self, x):
h = x
for l in self.conv_layers:
h = self.activation(l(h))
# Advantage
batch_size = x.shape[0]
ya = self.a_stream(h)
mean = F.reshape(
F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
ya, mean = F.broadcast(ya, mean)
ya -= mean
# State value
ys = self.v_stream(h)
ya, ys = F.broadcast(ya, ys)
q = ya + ys
return action_value.DiscreteActionValue(q)
python类broadcast()的实例源码
def __call__(self, ws, cs, ls, ts):
h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim)
h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim)
batchsize, windowsize, _, _ = h_c.data.shape
# (batchsize, windowsize, char_dim)
h_c = F.sum(h_c, 2)
h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1)))
h_c = h_c / ls
h = F.concat([h_w, h_c], 2)
h = F.reshape(h, (batchsize, -1))
# ys = self.linear1(h)
h = F.relu(self.linear1(h))
h = F.dropout(h, ratio=.5, train=self.train)
ys = self.linear2(h)
loss = F.softmax_cross_entropy(ys, ts)
acc = F.accuracy(ys, ts)
chainer.report({
"loss": loss,
"accuracy": acc
}, self)
return loss
def __call__(self, X, ht_enc):
pad = self._kernel_size - 1
WX = self.W(X)
if pad > 0:
WX = WX[..., :-pad]
Vh = self.V(ht_enc)
# copy Vh
# e.g.
# WX = [[[ 0 1 2]
# [ 3 4 5]
# [ 6 7 8]
# Vh = [[11, 12, 13]]
#
# Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX)
#
# WX = [[[ 0 1 2]
# [ 3 4 5]
# [ 6 7 8]
# Vh = [[[ 11 11 11]
# [ 12 12 12]
# [ 13 13 13]
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def gaussian_likelihood(x, mu, var):
"""Returns likelihood of ``x``, or ``N(x; mu, var)``
Args:
x(float, numpy.ndarray or chainer.Variable): sample data
mu(float or chainer.Variable): mean of Gaussian
var(float): variance of Gaussian
Returns:
chainer.Variable: Variable holding likelihood ``N(x; mu, var)``
whose shape is same as that of ``x``
"""
if numpy.isscalar(x):
x = numpy.array(x)
if isinstance(x, numpy.ndarray):
x = chainer.Variable(x.astype(numpy.float32))
if numpy.isscalar(mu):
mu = numpy.array(mu)
if isinstance(mu, numpy.ndarray):
mu = chainer.Variable(mu.astype(numpy.float32))
x, mu = F.broadcast(x, mu)
return F.exp(-(x - mu) ** 2 / var / 2) / numpy.sqrt(2 * numpy.pi * var)
def __call__(self, x):
minibatch_size = x.shape[0]
activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
activation_ex = F.expand_dims(activation, 3)
activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
diff = activation_ex - activation_ex_t
xp = chainer.cuda.get_array_module(x.data)
eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
sum_diff = F.sum(abs(diff), axis=2)
sum_diff = F.broadcast_to(sum_diff, eps.shape)
abs_diff = sum_diff + eps
minibatch_features = F.sum(F.exp(-abs_diff), 2)
return F.concat((x, minibatch_features), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def convert_unk(embed, cs):
cs = F.broadcast(cs)
cexs = embed(cs)
return (cexs,)
def convert_unk(embed, cs):
cs = F.broadcast(cs)
cexs = embed(cs)
return (cexs,)
def __call__(self, e1, e2):
ele2 = F.reshape(
F.batch_matmul(e1[:,:,None], e2[:,None,:]), (-1, self.in_size1 * self.in_size2))
res = F.matmul(ele2,
F.reshape(self.W, (self.in_size1 * self.in_size2, self.out_size))) + \
F.matmul(e1, self.V1) + \
F.matmul(e2, self.V2)
res, bias = F.broadcast(res, self.b)
return res + bias
def predict(self, tokens):
self.train = False
contexts = self.feature_extract(tokens) \
if isinstance(tokens[0], unicode) else tokens
# contexts [(w, c, l), (w, c, l)]
ws, cs, ls = zip(*contexts)
max_cs_size = max(c.shape[1] for c in cs)
new_cs = []
for c in cs:
c = np.pad(c, ((0, 0), (0, max_cs_size - c.shape[1])),
mode='constant', constant_values=-1)
new_cs.append(c)
ws = np.asarray(ws, 'i')
cs = np.asarray(new_cs, 'i')
ls = np.asarray(ls, 'f')
h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim)
h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim)
batchsize, windowsize, _, _ = h_c.data.shape
# (batchsize, windowsize, char_dim)
h_c = F.sum(h_c, 2)
h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1)))
h_c = h_c / ls
h = F.concat([h_w, h_c], 2)
h = F.reshape(h, (batchsize, -1))
# ys = self.linear(h)
h = F.relu(self.linear1(h))
h = F.dropout(h, ratio=.5, train=self.train)
ys = self.linear2(h)
return ys.data
def attention_sum(encoding, query):
alpha = F.softmax(F.batch_matmul(encoding, query, transb=True))
alpha, encoding = F.broadcast(alpha[:, :, :, None],
encoding[:, :, None, :])
return F.sum(alpha * encoding, axis=1)
def pre(self, x):
dims = len(x.shape) - 1
if self.kernel_size == 1:
ret = self.W(x)
elif self.kernel_size == 2:
if dims == 2:
xprev = Variable(
self.xp.zeros((self.batch_size, 1, self.in_size),
dtype=np.float32), volatile='AUTO')
xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
else:
xtminus1 = self.x
ret = self.W(x) + self.V(xtminus1)
else:
ret = F.swapaxes(self.conv(
F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)
if not self.attention:
return ret
if dims == 1:
enc = self.encoding[:, -1, :]
else:
enc = self.encoding[:, -1:, :]
return sum(F.broadcast(self.U(enc), ret))
def forward_one_step(self, X, ht_enc):
pad = self._kernel_size - 1
WX = self.W(X)[..., -pad-1, None]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def check_forward(self, data):
xs = [chainer.Variable(x) for x in data]
bxs = functions.broadcast(*xs)
# When len(xs) == 1, function returns a Variable object
if isinstance(bxs, chainer.Variable):
bxs = (bxs,)
for bx in bxs:
self.assertEqual(bx.data.shape, self.out_shape)
self.assertEqual(bx.data.dtype, self.dtype)
def test_invalid_shape(self):
x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32)
y_data = numpy.zeros((1, 3, 4), dtype=numpy.float32)
x = chainer.Variable(x_data)
y = chainer.Variable(y_data)
with self.assertRaises(type_check.InvalidType):
functions.broadcast(x, y)
def test_invalid_shape_fill(self):
x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32)
y_data = numpy.zeros(4, dtype=numpy.float32)
x = chainer.Variable(x_data)
y = chainer.Variable(y_data)
with self.assertRaises(type_check.InvalidType):
functions.broadcast(x, y)
def test_no_args(self):
with self.assertRaises(type_check.InvalidType):
functions.broadcast()
def attention_history(self, dL, cue, train=True):
D = F.concat(dL, axis=0)
D, Cue = F.broadcast(D, cue)
S = self.m(F.tanh(self.W_dm(D) + Cue))
S = F.softmax(F.reshape(S, (1, len(dL))))
pre_v = F.matmul(S, D)
return pre_v
def attention_sum(encoding, query):
alpha = F.softmax(F.batch_matmul(encoding, query, transb=True))
alpha, encoding = F.broadcast(alpha[:, :, :, None],
encoding[:, :, None, :])
return F.sum(alpha * encoding, axis=1)
def pre(self, x):
dims = len(x.shape) - 1
if self.kernel_size == 1:
ret = self.W(x)
elif self.kernel_size == 2:
if dims == 2:
xprev = Variable(
self.xp.zeros((self.batch_size, 1, self.in_size),
dtype=np.float32), volatile='AUTO')
xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
else:
xtminus1 = self.x
ret = self.W(x) + self.V(xtminus1)
else:
ret = F.swapaxes(self.conv(
F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)
if not self.attention:
return ret
if dims == 1:
enc = self.encoding[:, -1, :]
else:
enc = self.encoding[:, -1:, :]
return sum(F.broadcast(self.U(enc), ret))
def __call__(self, x):
# Apply a mask to the filters (optional)
if self.filter_mask is not None:
w, m = F.broadcast(self.W, Variable(self.filter_mask))
w = w * m
# w = self.W * Variable(self.filter_mask)
else:
w = self.W
# Transform the filters
# w.shape == (out_channels, in_channels, input_stabilizer_size, ksize, ksize)
# tw.shape == (out_channels, output_stabilizer_size, in_channels, input_stabilizer_size, ksize, ksize)
tw = TransformGFilter(self.inds)(w)
# Fold the transformed filters
tw_shape = (self.out_channels * self.output_stabilizer_size,
self.in_channels * self.input_stabilizer_size,
self.ksize, self.ksize)
tw = F.Reshape(tw_shape)(tw)
# If flat_channels is False, we need to flatten the input feature maps to have a single 1d feature dimension.
if not self.flat_channels:
batch_size = x.data.shape[0]
in_ny, in_nx = x.data.shape[-2:]
x = F.reshape(x, (batch_size, self.in_channels * self.input_stabilizer_size, in_ny, in_nx))
# Perform the 2D convolution
y = F.convolution_2d(x, tw, b=None, stride=self.stride, pad=self.pad, use_cudnn=self.use_cudnn)
# Unfold the output feature maps
# We do this even if flat_channels is True, because we need to add the same bias to each G-feature map
batch_size, _, ny_out, nx_out = y.data.shape
y = F.reshape(y, (batch_size, self.out_channels, self.output_stabilizer_size, ny_out, nx_out))
# Add a bias to each G-feature map
if self.usebias:
bb = F.Reshape((1, self.out_channels, 1, 1, 1))(self.b)
y, b = F.broadcast(y, bb)
y = y + b
# Flatten feature channels if needed
if self.flat_channels:
n, nc, ng, nx, ny = y.data.shape
y = F.reshape(y, (n, nc * ng, nx, ny))
return y
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
pad = self._kernel_size - 1
WX = self.W(X)
if pad > 0:
WX = WX[:, :, :-pad]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
self.contexts = []
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if t == 0:
ct = (1 - f) * z
self.contexts.append(ct)
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if t == 0:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
pad = self._kernel_size - 1
WX = self.W(X)[:, :, -pad-1, None]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if self.contexts is None:
ct = (1 - f) * z
self.contexts = [ct]
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t - T]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if self.H is None:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H