def squared_distance_matrix(X):
n = X.shape[0]
XX = F.sum(X ** 2.0, axis=1)
distances = -2.0 * F.linear(X, X)
distances = distances + F.broadcast_to(XX, (n, n))
distances = distances + F.broadcast_to(F.expand_dims(XX, 1), (n, n))
return distances
python类broadcast_to()的实例源码
def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
'''
Args:
f (chainer.Variable or xp.npdarray):
Anchor vectors. Each vectors in f must be l2 normalized.
f_p (chainer.Variable or xp.npdarray):
Positive vectors. Each vectors in f must be l2 normalized.
'''
xp = cuda.get_array_module(f)
if in_degree:
alpha = np.deg2rad(alpha)
sq_tan_alpha = np.tan(alpha) ** 2
n_pairs = len(f)
# first and second term of f_{a,p,n}
term1 = 4 * sq_tan_alpha + matmul(f + f_p, transpose(f_p))
term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
# term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)
f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
# multiply zero to diagonal components of f_apn
mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
f_apn = f_apn * mask
return F.average(F.logsumexp(f_apn, axis=1))
def __call__(self, x, context):
e = self.embed(context)
shape = e.shape
x = F.broadcast_to(x[:, None], (shape[0], shape[1]))
e = F.reshape(e, (shape[0] * shape[1], shape[2]))
x = F.reshape(x, (shape[0] * shape[1],))
loss = self.loss_func(e, x)
reporter.report({'loss': loss}, self)
return loss
def __call__(self, x, context):
x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
x = F.reshape(x, (context.shape[0] * context.shape[1],))
context = context.reshape((context.shape[0] * context.shape[1]))
e = self.rnn.charRNN(context)
loss = self.loss_func(e, x)
reporter.report({'loss': loss}, self)
return loss
def __call__(self, x):
return functions.broadcast_to(x, self.shape)
def __call__(self, x):
"""Normalize input and scale it.
Args:
x (chainer.Variable): A variable holding 4-dimensional array.
Its :obj:`dtype` is :obj:`numpy.float32`.
Returns:
chainer.Variable:
The shape and :obj:`dtype` are same as those of input.
"""
x = F.normalize(x, eps=self.eps, axis=1)
scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape)
return x * scale
def __call__(self, S, h):
batch_size, src_len, hidden_size = S.data.shape
h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len))
h = F.swapaxes(h, 1, 2)
S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size))
a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len)))
return a
def term_slop(self, loc, val, bs, nf, train=True):
""" Compute the slope for each active feature.
"""
shape = (bs, nf)
# Reshape all of our constants
pr_mu = F.broadcast_to(self.slop_mu.b, shape)
pr_lv = F.broadcast_to(self.slop_lv.b, shape)
# This is either zero or a very negative number
# indicating to sample N(mean, logvar) or just draw
# the mean preicsely
if not train:
pr_lv += self.lv_floor
# The feature slopes are grouped together so that they
# all share a common mean. Then individual features slop_delta_lv
# are shrunk towards zero, which effectively sets features to fall
# back on the group mean.
sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu
sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv
coef = F.gaussian(sl_mu, sl_lv)
slop = F.sum(coef * val, axis=1)
# Calculate divergence between group mean and N(0, 1)
kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b)
# Calculate divergence of individual delta means and delta vars
args = (self.slop_delta_mu.W, self.slop_delta_lv.W)
kld2 = F.gaussian_kl_divergence(*args)
return slop, kld1 + kld2
def kl_div(mu1, lv1, lv2):
# KL Divergence between given normal and prior at N(0, sigma_2)
# Prior assumes mean at zero
# lns2 - lns1 + (s2^2 + (u1 - u2)**2)/ 2s2**2 - 0.5
if len(lv1.shape) == 2:
lv1 = F.expand_dims(lv1, 0)
mu1 = F.expand_dims(mu1, 0)
lv2 = F.broadcast_to(lv2, lv1.shape)
v12 = F.exp(lv1)**2.0
v22 = F.exp(lv2)**2.0
return lv2 - lv1 + .5 * v12 / v22 + .5 * mu1**2. / v22 - .5
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
# Change all of the shapes to form interaction vectors
shape = (bs, nf * 2, self.n_dim)
feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
if not train:
feat_lv_vec += self.lv_floor
# Construct the interaction mean and variance
# iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
# dot(feat, feat) is (bs, nf)
ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
feat_lv_vec + self.feat_delta_lv(iloc))
jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
feat_lv_vec + self.feat_delta_lv(jloc))
# feat is (bs, )
feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)
# Compute the KLD for the group mean vector and variance vector
# KL(N(group mu, group lv) || N(0, hyper_lv))
# hyper_lv ~ gamma(1, 1)
kldg = F.sum(kl_div(self.feat_mu_vec.b, self.feat_lv_vec.b,
self.hyper_feat_lv_vec.b))
# Compute deviations from hyperprior
# KL(N(delta_i, delta_i lv) || N(0, hyper_delta_lv))
# hyper_delta_lv ~ gamma(1, 1)
kldi = F.sum(kl_div(self.feat_delta_mu.W, self.feat_delta_lv.W,
self.hyper_feat_delta_lv.b))
# Hyperprior penalty for log(var) ~ Gamma(alpha=1, beta=1)
# Gamma(log(var) | alpha=1, beta=1) = -log(var)
# The loss function will attempt to make log(var) as negative as
# possible which will in turn make the variance as small as possible
# The sum just casts a 1D vector to a scalar
hyperg = -F.sum(self.hyper_feat_lv_vec.b)
hyperi = -F.sum(self.hyper_feat_delta_lv.b)
return feat, kldg, kldi, hyperg, hyperi
updater.py 文件源码
项目:Semantic-Segmentation-using-Adversarial-Networks
作者: oyam
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def _make_dis_input(self, input_img, label_map):
b = F.broadcast_to(input_img[:,0,:,:], shape=label_map.shape)
g = F.broadcast_to(input_img[:,1,:,:], shape=label_map.shape)
r = F.broadcast_to(input_img[:,2,:,:], shape=label_map.shape)
product_b = label_map * b
product_g = label_map * g
product_r = label_map * r
dis_input = F.concat([product_b, product_g, product_r], axis=1)
return dis_input
def free_energy(self, v):
"""
:param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
:return: scalar
"""
batch_size = v.data.shape[0]
in_channels = self.in_channels
real = self.real
if real == 0:
'''
visible layer is 0, 1 (bit)
vbias_term = 1 * SUM(a(i) * v(i))
'''
v_sum = F.sum(v, axis=(2, 3)) # sum over image_height & image_width
# Originally, it should return sum for each batch.
# but it returns scalar, which is sum over batches, since sum is used at the end anyway.
vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
wx_b = self.conv(v)
else:
'''
visible layer takes real value
vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
'''
#TODO: check
#m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
xp = cuda.get_array_module(n.data)
std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))
#v_ = v - F.matmul(m, n)
v_ = (v - F.broadcast_to(n, v.data.shape)) / std_ch
vbias_term = F.sum(0.5 * v_ * v_)
wx_b = self.conv(v / std_ch)
hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
# print('vbias = ', vbias_term.data, ', hidden = ', hidden_term.data, 'F.exp(wx_b) = ', F.exp(wx_b).data)
return - vbias_term - hidden_term
def maximum_entropy_mellowmax(values, omega=1., beta_min=-10, beta_max=10):
"""Maximum entropy mellowmax policy function.
This function provides a categorical distribution whose expectation matches
the one of mellowmax function while maximizing its entropy.
See: http://arxiv.org/abs/1612.05628
Args:
values (Variable or ndarray):
Input values. Mellowmax is taken along the second axis.
omega (float):
Parameter of mellowmax.
beta_min (float):
Minimum value of beta, used in Brent's algorithm.
beta_max (float):
Maximum value of beta, used in Brent's algorithm.
Returns:
outputs (Variable)
"""
xp = chainer.cuda.get_array_module(values)
mm = mellowmax(values, axis=1)
# Advantage: Q - mellowmax(Q)
batch_adv = values - F.broadcast_to(F.expand_dims(mm, 1), values.shape)
# Move data to CPU because we use Brent's algorithm in scipy
batch_adv = chainer.cuda.to_cpu(batch_adv.data)
batch_beta = np.empty(mm.shape, dtype=np.float32)
# Beta is computed as the root of this function
def f(y, adv):
return np.sum(np.exp(y * adv) * adv)
for idx in np.ndindex(mm.shape):
idx_full = idx[:1] + (slice(None),) + idx[1:]
adv = batch_adv[idx_full]
try:
beta = scipy.optimize.brentq(
f, a=beta_min, b=beta_max, args=(adv,))
except ValueError:
beta = 0
batch_beta[idx] = beta
return F.softmax(xp.expand_dims(xp.asarray(batch_beta), 1) * values)
def __init__(self, n_input_channels, action_size, var,
n_hidden_layers=0, n_hidden_channels=None,
min_action=None, max_action=None, bound_mean=False,
nonlinearity=F.relu, mean_wscale=1):
self.n_input_channels = n_input_channels
self.action_size = action_size
self.n_hidden_layers = n_hidden_layers
self.n_hidden_channels = n_hidden_channels
self.min_action = min_action
self.max_action = max_action
self.bound_mean = bound_mean
self.nonlinearity = nonlinearity
if np.isscalar(var):
self.var = np.full(action_size, var, dtype=np.float32)
else:
self.var = var
layers = []
if n_hidden_layers > 0:
# Input to hidden
layers.append(L.Linear(n_input_channels, n_hidden_channels))
layers.append(self.nonlinearity)
for _ in range(n_hidden_layers - 1):
# Hidden to hidden
layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
layers.append(self.nonlinearity)
# The last layer is used to compute the mean
layers.append(
L.Linear(n_hidden_channels, action_size,
initialW=LeCunNormal(mean_wscale)))
else:
# There's only one layer for computing the mean
layers.append(
L.Linear(n_input_channels, action_size,
initialW=LeCunNormal(mean_wscale)))
if self.bound_mean:
layers.append(lambda x: bound_by_tanh(
x, self.min_action, self.max_action))
def get_var_array(shape):
self.var = self.xp.asarray(self.var)
return self.xp.broadcast_to(self.var, shape)
layers.append(lambda x: distribution.GaussianDistribution(
x, get_var_array(x.shape)))
super().__init__(*layers)
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
pad = self._kernel_size - 1
WX = self.W(X)
if pad > 0:
WX = WX[:, :, :-pad]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
self.contexts = []
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if t == 0:
ct = (1 - f) * z
self.contexts.append(ct)
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if t == 0:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
pad = self._kernel_size - 1
WX = self.W(X)[:, :, -pad-1, None]
Vh = self.V(ht_enc)
Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)
# f-pooling
Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
Z = functions.tanh(Z)
F = self.zoneout(F)
O = functions.sigmoid(O)
T = Z.shape[2]
# compute ungated hidden states
for t in xrange(T):
z = Z[..., t]
f = F[..., t]
if self.contexts is None:
ct = (1 - f) * z
self.contexts = [ct]
else:
ct = f * self.contexts[-1] + (1 - f) * z
self.contexts.append(ct)
if skip_mask is not None:
assert skip_mask.shape[1] == H_enc.shape[2]
softmax_bias = (skip_mask == 0) * -1e6
# compute attention weights (eq.8)
H_enc = functions.swapaxes(H_enc, 1, 2)
for t in xrange(T):
ct = self.contexts[t - T]
bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD
mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD
alpha = functions.batch_matmul(H_enc, ct) + bias
alpha = functions.softmax(alpha) * mask
alpha = functions.broadcast_to(alpha, H_enc.shape) # copy
kt = functions.sum(alpha * H_enc, axis=1)
ot = O[..., t]
self.ht = ot * self.o(functions.concat((kt, ct), axis=1))
if self.H is None:
self.H = functions.expand_dims(self.ht, 2)
else:
self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)
return self.H
def calcAttention(self, h1, hList, aList, encLen, cMBSize, args):
# attention????????????????h1???
if self.attn_mode == 0:
return h1
# 1, attention????????
target1 = self.model.attnIn_L1(h1) # ??????
# (cMBSize, self.hDim) => (cMBSize, 1, self.hDim)
target2 = chaFunc.expand_dims(target1, axis=1)
# (cMBSize, 1, self.hDim) => (cMBSize, encLen, self.hDim)
target3 = chaFunc.broadcast_to(target2, (cMBSize, encLen, self.hDim))
# target3 = chaFunc.broadcast_to(chaFunc.reshape(
# target1, (cMBSize, 1, self.hDim)), (cMBSize, encLen, self.hDim))
# 2, attention?????????
if self.attn_mode == 1: # bilinear
# bilinear??attention?????hList1 == hList2 ???
# shape: (cMBSize, encLen)
aval = chaFunc.sum(target3 * aList, axis=2)
elif self.attn_mode == 2: # MLP
# attnSum ????????
t1 = chaFunc.reshape(target3, (cMBSize * encLen, self.hDim))
# (cMBSize*encLen, self.hDim) => (cMBSize*encLen, 1)
t2 = self.model.attnSum(chaFunc.tanh(t1 + aList))
# shape: (cMBSize, encLen)
aval = chaFunc.reshape(t2, (cMBSize, encLen))
# aval = chaFunc.reshape(self.model.attnSum(
# chaFunc.tanh(t1 + aList)), (cMBSize, encLen))
else:
assert 0, "ERROR"
# 3, softmax????
cAttn1 = chaFunc.softmax(aval) # (cMBSize, encLen)
# 4, attention???????context vector????????
# (cMBSize, encLen) => (cMBSize, 1, encLen)
cAttn2 = chaFunc.expand_dims(cAttn1, axis=1)
# (1, encLen) x (encLen, hDim) ?????(matmul)?cMBSize?????
# => (cMBSize, 1, hDim)
cAttn3 = chaFunc.batch_matmul(cAttn2, hList)
# cAttn3 = chaFunc.batch_matmul(chaFunc.reshape(
# cAttn1, (cMBSize, 1, encLen)), hList)
# axis=1???1????????????
context = chaFunc.reshape(cAttn3, (cMBSize, self.hDim))
# 4, attention???????context vector????????
# ??????????
# (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
# cAttn2 = chaFunc.reshape(cAttn1, (cMBSize, encLen, 1))
# (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
# cAttn3 = chaFunc.broadcast_to(cAttn2, (cMBSize, encLen, self.hDim))
# ???????? (cMBSize, encLen, hDim)
# => (cMBSize, hDim) # axis=1 ?????
# context = chaFunc.sum(aList * cAttn3, axis=1)
# 6, attention??????????
c1 = chaFunc.concat((h1, context))
c2 = self.model.attnOut_L2(c1)
finalH = chaFunc.tanh(c2)
# finalH = chaFunc.tanh(self.model.attnOut_L2(
# chaFunc.concat((h1, context))))
return finalH # context
# ??????