def _context(self, p, fb_mat, fbe_mat):
batch_size, source_length, _ = fb_mat.data.shape
# {pe,e}_mat: shape = [batch * srclen, atten]
pe_mat = F.reshape(
F.broadcast_to(
F.expand_dims(self.p_e(p), 1),
[batch_size, source_length, self.atten_size]),
[batch_size * source_length, self.atten_size])
e_mat = F.tanh(fbe_mat + pe_mat)
# a_mat: shape = [batch, srclen]
a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length]))
# q: shape = [batch, 2 * hidden]
q = F.reshape(
F.batch_matmul(a_mat, fb_mat, transa=True),
[batch_size, 2 * self.hidden_size])
return q
python类broadcast_to()的实例源码
def attend(self, query, key, value, mask, minfs=None):
"""
Input shapes:
q=(b, units, dec_l), k=(b, units, enc_l),
v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
"""
# Calculate Attention Scores with Mask for Zero-padded Areas
pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l)
minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
if minfs is None else minfs
pre_a = F.where(mask, pre_a, minfs)
a = F.softmax(pre_a, axis=2)
# if values in axis=2 are all -inf, they become nan. thus do re-mask.
a = F.where(self.xp.isnan(a.data),
self.xp.zeros(a.shape, dtype=a.dtype), a)
reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l)
# Calculate Weighted Sum
pre_c = F.broadcast_to(reshaped_a, value.shape) * value
c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1)
return c
train_word2vec_subword_chainer_input.py 文件源码
项目:vsmlib
作者: undertherain
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def __call__(self, x, context):
x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
x = F.reshape(x, (context.shape[0] * context.shape[1],))
if args.subword == 'rnn':
context = context.reshape((context.shape[0] * context.shape[1]))
e = self.rnn.charRNN(context)
if args.subword == 'none':
e = self.embed(context)
e = F.reshape(e, (e.shape[0] * e.shape[1], e.shape[2]))
loss = self.loss_func(e, x)
reporter.report({'loss': loss}, self)
return loss
def __call__(self, y, a, ht, y_lex):
y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
return (y + F.log(y_dict + self.alpha))
#class LinearInterpolationLexicon(chainer.Chain):
# def __init__(self, hidden_size):
# super(LinearInterpolationLexicon, self).__init__(
# perceptron = chainer.links.Linear(hidden_size, 1)
# )
#
# def __call__(self, y, a, ht, y_lex):
# y = F.softmax(y)
# y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
# gamma = F.broadcast_to(F.sigmoid(self.perceptron(ht)), y_dict.data.shape)
# return (gamma * y_dict + (1-gamma) * y)
#
def __call__(self, x1, x2):
xp = self.xp
out_size = self.out_size
batch_size, len1, dim1 = x1.shape
if not self.nobias[0]:
x1 = F.concat((x1, xp.ones((batch_size, len1, 1),
dtype=xp.float32)), axis=2)
dim1 += 1
len2, dim2 = x2.shape[1:]
if not self.nobias[1]:
x2 = F.concat((x2, xp.ones((batch_size, len2, 1),
dtype=xp.float32)), axis=2)
dim2 += 1
x1_reshaped = F.reshape(x1, (batch_size * len1, dim1))
W_reshaped = F.reshape(F.transpose(self.W, (0, 2, 1)),
(dim1, out_size * dim2))
affine = F.reshape(F.matmul(x1_reshaped, W_reshaped),
(batch_size, len1 * out_size, dim2))
biaffine = F.transpose(
F.reshape(batch_matmul(affine, x2, transb=True),
(batch_size, len1, out_size, len2)),
(0, 1, 3, 2))
if not self.nobias[2]:
biaffine += F.broadcast_to(self.b, biaffine.shape)
return biaffine
def term_bias(self, bs, train=True):
""" Compute overall bias and broadcast to shape of batchsize
"""
shape = (bs, 1,)
# Bias is drawn from a Gaussian with given mu and log variance
bs_mu = F.broadcast_to(self.bias_mu.b, shape)
bs_lv = F.broadcast_to(self.bias_lv.b, shape)
bias = F.flatten(F.gaussian(bs_mu, bs_lv))
# Add a very negative log variance so we're sampling
# from a very narrow distribution about the mean.
# Useful for validation dataset when we want to only guess
# the mean.
if not train:
bs_lv += self.lv_floor
# Compute prior on the bias, so compute the KL div
# from the KL(N(mu_bias, var_bias) | N(0, 1))
kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
return bias, kld
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
# Change all of the shapes to form interaction vectors
shape = (bs, nf * 2, self.n_dim)
feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
if not train:
feat_lv_vec += self.lv_floor
# Construct the interaction mean and variance
# iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
# dot(feat, feat) is (bs, nf)
ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
feat_lv_vec + self.feat_delta_lv(iloc))
jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
feat_lv_vec + self.feat_delta_lv(jloc))
# feat is (bs, )
feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)
# Compute the KLD for the group mean vector and variance vector
kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b)
# Compute the KLD for vector deviations from the group mean and var
kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W,
self.feat_delta_lv.W)
return feat, kld1 + kld2
def term_bias(self, bs, train=True):
""" Compute overall bias and broadcast to shape of batchsize
"""
shape = (bs, 1,)
# Bias is drawn from a Gaussian with given mu and log variance
bs_mu = F.broadcast_to(self.bias_mu.b, shape)
bs_lv = F.broadcast_to(self.bias_lv.b, shape)
bias = F.flatten(F.gaussian(bs_mu, bs_lv))
# Add a very negative log variance so we're sampling
# from a very narrow distribution about the mean.
# Useful for validation dataset when we want to only guess
# the mean.
if not train:
bs_lv += self.lv_floor
# Compute prior on the bias, so compute the KL div
# from the KL(N(mu_bias, var_bias) | N(0, 1))
kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
return bias, kld
def __call__(self, x):
minibatch_size = x.shape[0]
activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
activation_ex = F.expand_dims(activation, 3)
activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
diff = activation_ex - activation_ex_t
xp = chainer.cuda.get_array_module(x.data)
eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
sum_diff = F.sum(abs(diff), axis=2)
sum_diff = F.broadcast_to(sum_diff, eps.shape)
abs_diff = sum_diff + eps
minibatch_features = F.sum(F.exp(-abs_diff), 2)
return F.concat((x, minibatch_features), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def __call__(self, v, h, label):
v_t = self.vertical_conv_t(v)
v_s = self.vertical_conv_s(v)
to_vertical_t = self.v_to_h_conv_t(v_t)
to_vertical_s = self.v_to_h_conv_s(v_s)
# v_gate = self.vertical_gate_conv(v)
# label bias is added to both vertical and horizontal conv
# here we take only shape as it should be the same
label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape)
v_t, v_s = v_t + label, v_s + label
v = F.tanh(v_t) * F.sigmoid(v_s)
h_t = self.horizontal_conv_t(h)
h_s = self.horizontal_conv_s(h)
h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label
h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s))
return v, h
def ordinal_loss(y, mask):
xp = cuda.get_array_module(y.data)
volatile = y.volatile
b, c, n = y.data.shape
max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape)
y = y - max_y
sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape)
down_tri = np.tri(c, dtype=np.float32)
up_tri = down_tri.T
w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
h = F.exp(F.expand_dims(y, -1))
h1 = F.convolution_2d(h, w1)
h1 = F.convolution_2d(F.log(h1), w1)
h2 = F.convolution_2d(h, w2)
h2 = F.convolution_2d(F.log(h2), w2)
h = F.reshape(h1 + h2, (b, c, n))
return F.sum((h - sum_y - y) * mask) / b
def __forward(self, batch_x, batch_t, weight, train=True):
xp = self.xp
x = Variable(xp.asarray(batch_x), volatile=not train)
t = Variable(xp.asarray(batch_t), volatile=not train)
y = self.net(x, train=train)
b, c, n = y.data.shape
mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t, self.net.rating_num)), volatile=not train)
if self.ordinal_weight == 0:
loss = F.sum(-F.log_softmax(y) * mask) / b
elif self.ordinal_weight == 1:
loss = ordinal_loss(y, mask)
else:
loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)
acc = self.__accuracy(y, t)
return loss, acc
def propup(self, vis):
"""
This function propagates the visible units activation upwards to the hidden units
Eq.(7)
:param vis: Variable Matrix(batch_size, in_channels, image_height, image_width)
- given v_sample
:return: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)
- probability for each hidden units to be h_i=1
"""
# conv.W: Matrix(out_channels, in_channels, filter height=ksize, filter width=ksize)
# conv.b: Vec (out_channels, )
if self.real == 0:
pre_sigmoid_activation = self.conv(vis)
else:
pre_sigmoid_activation = self.conv(vis / self.std_ch)
# F.matmul(vis, self.conv.W, transb=True) + F.broadcast_to(self.conv.b, (vis.data.shape[0], self.n_hidden))
return F.sigmoid(pre_sigmoid_activation)
def propdown(self, hid):
""" This function propagates the hidden units activation downwords to the visible units
:param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample
:return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
"""
batch_size = hid.data.shape[0]
if self.real == 0:
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
# F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
v_mean = F.sigmoid(pre_sigmoid_activation)
#print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
#print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
#print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
#print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
#print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
#print('v_mean', v_mean.data.shape)
#print('v_mean sum', F.sum(v_mean).data)
#print('hid', hid.data.shape)
else:
# TODO: check
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
return v_mean
def reconstruct(self, v):
"""
:param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
:return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
"""
batch_size = v.data.shape[0]
xp = cuda.get_array_module(v.data)
if self.real == 0:
h = F.sigmoid(self.conv(v))
else:
std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
h = F.sigmoid(self.conv(v / std_ch))
# F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
# = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
return reconstructed_v
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def clip_actions(actions, min_action, max_action):
min_actions = F.broadcast_to(min_action, actions.shape)
max_actions = F.broadcast_to(max_action, actions.shape)
return F.maximum(F.minimum(actions, max_actions), min_actions)
def compute_mean_and_var(self, x):
h = x
for layer in self.hidden_layers:
h = self.nonlinearity(layer(h))
mean = self.mean_layer(h)
if self.bound_mean:
mean = bound_by_tanh(mean, self.min_action, self.max_action)
var = F.broadcast_to(F.softplus(self.var_layer(h)), mean.shape) + \
self.min_var
return mean, var
def __call__(self, x):
mean = self.hidden_layers(x)
var = F.broadcast_to(
F.softplus(self.var_param),
mean.shape)
return distribution.GaussianDistribution(mean, var)
not_layer_instance_norm_sample.py 文件源码
项目:instance_normalization_chainer
作者: crcrpar
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def instance_norm(self, x, gamma=None, beta=None):
mean = F.mean(x, axis=-1)
mean = F.mean(mean, axis=-1)
mean = F.broadcast_to(mean[Ellipsis, None, None], x.shape)
var = F.squared_difference(x, mean)
std = F.sqrt(var + 1e-5)
x_hat = (x - mean) / std
if gamma is not None:
gamma = F.broadcast_to(gamma[None, Ellipsis, None, None], x.shape)
beta = F.broadcast_to(beta[None, Ellipsis, None, None], x.shape)
return gamma * x_hat + beta
else:
return x_hat
def prepare_decoding(self, state, lengths, train=True):
state = super().prepare_decoding(state, lengths, train=train)
x = state['x']
h = state['h']
c = F.broadcast_to(self.encoder.c0, (self.batchsize, self.dim_hid))
lengths = lengths.astype(np.float32)
lengths = lengths.reshape((self.batchsize, 1))
c = c * lengths
return {'x': x, 'c': c, 'h': h}
def prepare_decoding(self, state, lengths, train=True):
state = super().prepare_decoding(state, lengths, train=train)
x = state['x']
h = state['h']
c = F.broadcast_to(self.encoder.c0, (self.batchsize, self.dim_hid))
lengths = lengths.astype(np.float32)
lengths = lengths.reshape((self.batchsize, 1))
c = c * lengths
return {'x': x, 'c': c, 'h': h}
def _attend(self, p):
p = self.xh(p)
p = F.expand_dims(p, 1)
p = F.broadcast_to(p, self.shape2)
h = F.tanh(self.h + p)
shape3 = (self.batchsize * self.src_len, self.dim_hid)
h_reshaped = F.reshape(h, shape3)
weight_reshaped = self.hw(h_reshaped)
weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1))
weight = F.where(self.mask, weight, self.minf)
attention = F.softmax(weight)
return attention
def __call__(self, x):
return functions.broadcast_to(x, self.shape)
def setUp(self):
self.x1 = numpy.random.uniform(
.5, 1, (batch_size, m, k)).astype(numpy.float32)
self.x2 = numpy.random.uniform(
.5, 1, (1, k, n)).astype(numpy.float32)
self.gy = numpy.random.uniform(
-1, 1, (batch_size, m, n)).astype(numpy.float32)
self.op = lambda x, y: F.batch_matmul(
x, F.broadcast_to(y, (batch_size, k, n)))
self.forward_answer = numpy.array([
numpy.dot(self.x1[i], self.x2[0])
for i in six.moves.range(batch_size)])
def setUp(self):
self.x1 = numpy.random.uniform(
.5, 1, (batch_size, m, k)).astype(numpy.float32)
self.x2 = numpy.random.uniform(
.5, 1, (k, n)).astype(numpy.float32)
self.gy = numpy.random.uniform(
-1, 1, (batch_size, m, n)).astype(numpy.float32)
self.op = lambda x, y: F.batch_matmul(
x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n)))
self.forward_answer = numpy.array([
numpy.dot(self.x1[i], self.x2)
for i in six.moves.range(batch_size)])
def check_forward(self, data):
x = chainer.Variable(data)
bx = functions.broadcast_to(x, self.out_shape)
self.assertEqual(bx.data.shape, self.out_shape)
def test_type_check(self):
x = chainer.Variable(self.data)
with self.assertRaises(type_check.InvalidType):
functions.broadcast_to(x, self.out_shape)