def __call__(self, loc, val, y, train=True):
bs = val.data.shape[0]
pred, kld0, kld1, kld2 = self.forward(loc, val, y, train=train)
# Compute MSE loss
mse = F.mean_squared_error(pred, y)
rmse = F.sqrt(mse) # Only used for reporting
# Now compute the total KLD loss
kldt = kld0 * self.lambda0 + kld1 * self.lambda1 + kld2 * self.lambda2
# Total loss is MSE plus regularization losses
loss = mse + kldt * (1.0 / self.total_nobs)
# Log the errors
logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
'kld2': kld2, 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
reporter.report(logs, self)
return loss
python类sqrt()的实例源码
def __call__(self, loc, val, y, train=True):
bs = val.data.shape[0]
ret = self.forward(loc, val, y, train=train)
pred, kld0, kld1, kldg, kldi, hypg, hypi = ret
# Compute MSE loss
mse = F.mean_squared_error(pred, y)
rmse = F.sqrt(mse) # Only used for reporting
# Now compute the total KLD loss
kldt = kld0 * self.lambda0 + kld1 * self.lambda1
kldt += kldg + kldi + hypg + hypi
# Total loss is MSE plus regularization losses
loss = mse + kldt * (1.0 / self.total_nobs)
# Log the errors
logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
'kldg': kldg, 'kldi': kldi, 'hypg': hypg, 'hypi': hypi,
'hypglv': F.sum(self.hyper_feat_lv_vec.b),
'hypilv': F.sum(self.hyper_feat_delta_lv.b),
'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
reporter.report(logs, self)
return loss
not_layer_instance_norm_sample.py 文件源码
项目:instance_normalization_chainer
作者: crcrpar
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def instance_norm(self, x, gamma=None, beta=None):
mean = F.mean(x, axis=-1)
mean = F.mean(mean, axis=-1)
mean = F.broadcast_to(mean[Ellipsis, None, None], x.shape)
var = F.squared_difference(x, mean)
std = F.sqrt(var + 1e-5)
x_hat = (x - mean) / std
if gamma is not None:
gamma = F.broadcast_to(gamma[None, Ellipsis, None, None], x.shape)
beta = F.broadcast_to(beta[None, Ellipsis, None, None], x.shape)
return gamma * x_hat + beta
else:
return x_hat
def lifted_struct_loss(f_a, f_p, alpha=1.0):
"""Lifted struct loss function.
Args:
f_a (~chainer.Variable): Feature vectors as anchor examples.
All examples must be different classes each other.
f_p (~chainer.Variable): Positive examples corresponding to f_a.
Each example must be the same class for each example in f_a.
alpha (~float): The margin parameter.
Returns:
~chainer.Variable: Loss value.
See: `Deep Metric Learning via Lifted Structured Feature Embedding \
<http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/\
Song_Deep_Metric_Learning_CVPR_2016_paper.pdf>`_
"""
assert f_a.shape == f_p.shape, 'f_a and f_p must have same shape.'
n = 2 * f_a.shape[0] # use shape[0] due to len(Variable) returns its size
f = F.vstack((f_a, f_p))
D_sq = squared_distance_matrix(f)
pairs_p = np.arange(n).reshape(2, -1) # indexes of positive pairs
row = []
col = []
for i, j in pairs_p.T:
row.append([i] * (n - 2) + [j] * (n - 2))
col.append(np.tile(np.delete(np.arange(n), (i, j)), 2))
row = np.ravel(row)
col = np.ravel(col)
pairs_n = np.vstack((row, col))
distances_p = F.sqrt(D_sq[pairs_p[0], pairs_p[1]])
distances_n = F.sqrt(D_sq[pairs_n[0], pairs_n[1]])
distances_n = distances_n.reshape((n // 2, -1))
loss_ij = F.logsumexp(alpha - distances_n, axis=1) + distances_p
return F.sum(F.relu(loss_ij) ** 2) / n
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error,
lambda0=1, lambda1=1, lambda2=1, init_bias_mu=0.0,
init_bias_lv=0.0, intx_term=True, total_nobs=1):
self.n_dim = n_dim
self.n_features = n_features
self.lossfun = lossfun
self.lambda0 = lambda0
self.lambda1 = lambda1
self.lambda2 = lambda2
self.intx_term = intx_term
self.total_nobs = total_nobs
# In contrast to the FM model, the slopes and latent vectors
# will have means (mu) and log variances (lv) for each component.
super(VFM, self).__init__(bias_mu=L.Bias(shape=(1,)),
bias_lv=L.Bias(shape=(1,)),
slop_mu=L.Bias(shape=(1, 1)),
slop_lv=L.Bias(shape=(1, 1)),
slop_delta_mu=L.EmbedID(n_features, 1,
ignore_label=-1),
slop_delta_lv=L.EmbedID(n_features, 1,
ignore_label=-1),
feat_mu_vec=L.Bias(shape=(1, 1, n_dim)),
feat_lv_vec=L.Bias(shape=(1, 1, n_dim)),
feat_delta_mu=L.EmbedID(n_features, n_dim,
ignore_label=-1),
feat_delta_lv=L.EmbedID(n_features, n_dim,
ignore_label=-1))
# Xavier initialize weights
c = np.sqrt(n_features * n_dim) * 1e3
d = np.sqrt(n_features) * 1e3
self.feat_delta_mu.W.data[...] = np.random.randn(n_features, n_dim) / c
self.feat_delta_lv.W.data[...] = np.random.randn(n_features, n_dim) / c
self.slop_delta_mu.W.data[...] = np.random.randn(n_features, 1) / d
self.slop_delta_lv.W.data[...] = np.random.randn(n_features, 1) / d
self.bias_mu.b.data[...] *= 0.0
self.bias_mu.b.data[...] += init_bias_mu
self.bias_lv.b.data[...] *= 0.0
self.bias_lv.b.data[...] += init_bias_lv
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error,
lambda0=5e-3, lambda1=5e-3, lambda2=5e-3, init_bias=0.0,
intx_term=True, total_nobs=1):
self.n_dim = n_dim
self.n_features = n_features
self.lossfun = lossfun
self.lambda0 = lambda0
self.lambda1 = lambda1
self.lambda2 = lambda2
self.intx_term = intx_term
self.total_nobs = total_nobs
# These are all the learned weights corresponding
# to the overall bias, slope per feature, and latent
# interaction vector per feature
super(FM, self).__init__(bias=L.Bias(shape=(1,)),
slope=L.EmbedID(n_features, 1),
latent=L.EmbedID(n_features, n_dim))
# Xavier initialize weights
c = np.sqrt(n_features * n_dim)
self.latent.W.data[...] = np.random.randn(n_features, n_dim) / c
d = np.sqrt(n_features)
self.slope.W.data[...] = np.random.randn(n_features, 1) / d
self.bias.b.data[...] *= 0.0
self.bias.b.data[...] += init_bias
def average_loss(self, h, a, t):
## print F.reshape(t, (-1, 1)).data
## print (h-F.reshape(t, (-1, 1))).data
self.loss = F.sum(abs(h - F.reshape(t, (-1,1))))
## self.loss = F.sqrt(F.sum(F.square(h - F.reshape(t, (-1,1)))))
self.loss /= self.n_patches
if self.n_images > 1:
h = F.split_axis(h, self.n_images, 0)
a = F.split_axis(a, self.n_images, 0)
else:
h, a = [h], [a]
self.y = h
self.a = a
def update_core(self):
xp = self.gen.xp
self._iter += 1
opt_d = self.get_optimizer('dis')
for i in range(self._dis_iter):
d_fake = self.get_fake_image_batch()
d_real = self.get_real_image_batch()
y_fake = self.dis(Variable(d_fake), test=False)
y_real = self.dis(Variable(d_real), test=False)
w1 = F.average(y_fake-y_real)
loss_dis = w1
if self._mode == 'gp':
eta = np.random.rand()
c = (d_real * eta + (1.0 - eta) * d_fake).astype('f')
y = self.dis(Variable(c), test=False, retain_forward=True)
g = xp.ones_like(y.data)
grad_c = self.dis.differentiable_backward(Variable(g))
grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3)))
loss_gp = loss_l2(grad_c_l2, 1.0)
loss_dis += self._lambda_gp * loss_gp
opt_d.zero_grads()
loss_dis.backward()
opt_d.update()
if self._mode == 'clip':
self.dis.clip()
chainer.report({'loss': loss_dis,'loss_w1': w1}, self.dis)
z_in = self.get_latent_code_batch()
x_out = self.gen(Variable(z_in), test=False)
opt_g = self.get_optimizer('gen')
y_fake = self.dis(x_out, test=False)
loss_gen = - F.average(y_fake)
chainer.report({'loss': loss_gen}, self.gen)
opt_g.zero_grads()
loss_gen.backward()
opt_g.update()
def update_core(self):
xp = self.gen.xp
self._iter += 1
opt_g = self.get_optimizer('gen')
opt_d = self.get_optimizer('dis')
data_z0 = self.get_latent_code_batch()
x_fake0 = self.gen(Variable(data_z0))
data_z1 = self.get_latent_code_batch()
x_fake1 = self.gen(Variable(data_z1))
data_x = self.get_real_image_batch()
x_real = Variable(data_x)
eta = np.random.rand()
x_inter = Variable((data_x * eta + (1.0 - eta) * x_fake0.data).astype('f'))
dis_x_fake0 = self.dis(x_fake0)
dis_x_fake1 = self.dis(x_fake1)
dis_x_real = self.dis(x_real)
loss_gen = loss_l2_norm(dis_x_fake0, dis_x_real) + \
loss_l2_norm(dis_x_fake1, dis_x_real) - \
loss_l2_norm(dis_x_fake0, dis_x_fake1)
#print(loss_gen.data)
chainer.report({'loss': loss_gen}, self.gen)
opt_g.zero_grads()
loss_gen.backward()
opt_g.update()
x_fake0.unchain_backward()
x_fake1.unchain_backward()
loss_surrogate = loss_l2_norm(dis_x_fake0, dis_x_fake1) - \
loss_l2_norm(dis_x_fake0, 0.0) + \
loss_l2_norm(dis_x_real, 0.0) - \
loss_l2_norm(dis_x_real, dis_x_fake1)
dis_x_inter = self.dis(x_inter, retain_forward=True)
g = xp.ones_like(dis_x_inter.data)
t0 = dis_x_inter.data - dis_x_fake1.data
t0_norm = xp.sum(t0**2, axis=(1)) ** 0.5
t1_norm = xp.sum(dis_x_inter.data**2, axis=(1)) ** 0.5
t_g = ((t0.transpose() / t0_norm) - (dis_x_inter.data.transpose()) / t1_norm).transpose()
g = g * t_g
grad = self.dis.differentiable_backward(Variable(g))
grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3)))
loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0)
loss_dis = loss_surrogate + loss_gp
opt_d.zero_grads()
loss_dis.backward()
opt_d.update()
chainer.report({'loss': loss_dis, 'loss_gp': loss_gp}, self.dis)
def update_core(self):
xp = self.gen.xp
self._iter += 1
opt_g = self.get_optimizer('gen')
opt_d = self.get_optimizer('dis')
data_z = self.get_latent_code_batch()
data_x = self.get_real_image_batch()
x_fake = self.gen(Variable(data_z))
dis_fake = self.dis(x_fake)
loss_gen = loss_func_dcgan_dis_real(dis_fake)
chainer.report({'loss': loss_gen}, self.gen)
opt_g.zero_grads()
loss_gen.backward()
opt_g.update()
x_fake.unchain_backward()
std_data_x = xp.std(data_x, axis=0, keepdims=True)
rnd_x = xp.random.uniform(0, 1, data_x.shape).astype("f")
x_perturbed = Variable(data_x + 0.5*rnd_x*std_data_x)
x_real = Variable(data_x)
dis_real = self.dis(x_real)
dis_perturbed = self.dis(x_perturbed, retain_forward=True)
g = Variable(xp.ones_like(dis_perturbed.data))
grad = self.dis.differentiable_backward(g)
grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3)))
loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0)
loss_dis = loss_func_dcgan_dis_real(dis_real) + \
loss_func_dcgan_dis_fake(dis_fake) + \
loss_gp
opt_d.zero_grads()
loss_dis.backward()
opt_d.update()
chainer.report({'loss': loss_dis, 'loss_gp': loss_gp}, self.dis)
def update_core(self):
xp = self.gen.xp
self._iter += 1
opt_g = self.get_optimizer('gen')
opt_d = self.get_optimizer('dis')
data_z = self.get_latent_code_batch()
data_tag = self.get_fake_tag_batch()
data_x, data_real_tag = self.get_real_image_batch()
x_fake = self.gen(F.concat([Variable(data_z),Variable(data_tag)]))
dis_fake, dis_g_class = self.dis(x_fake)
data_tag[data_tag < 0] = 0.0
loss_g_class =loss_sigmoid_cross_entropy_with_logits(dis_g_class, data_tag)
#print(loss_g_class.data)
loss_gen = self._lambda_adv * loss_func_dcgan_dis_real(dis_fake) + loss_g_class
chainer.report({'loss': loss_gen, 'loss_c': loss_g_class}, self.gen)
opt_g.zero_grads()
loss_gen.backward()
opt_g.update()
x_fake.unchain_backward()
std_data_x = xp.std(data_x, axis=0, keepdims=True)
rnd_x = xp.random.uniform(0, 1, data_x.shape).astype("f")
x_perturbed = Variable(data_x + 0.5*rnd_x*std_data_x)
x_real = Variable(data_x)
dis_real, dis_d_class = self.dis(x_real)
dis_perturbed, _ = self.dis(x_perturbed, retain_forward=True)
g = Variable(xp.ones_like(dis_perturbed.data))
grad = self.dis.differentiable_backward(g)
grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3)))
loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0)
loss_d_class = loss_sigmoid_cross_entropy_with_logits(dis_d_class, data_real_tag)
loss_dis = self._lambda_adv * ( loss_func_dcgan_dis_real(dis_real) + \
loss_func_dcgan_dis_fake(dis_fake) )+ \
loss_d_class + \
loss_gp
opt_d.zero_grads()
loss_dis.backward()
opt_d.update()
chainer.report({'loss': loss_dis, 'loss_gp': loss_gp, 'loss_c': loss_d_class}, self.dis)
def compute_distance_of_cluster_heads(self):
# list all possible combinations of two cluster heads
num_combination = self.nCr(self.ndim_y, 2)
# a_labels
# [0, 1, 0, 0]
# [0, 0, 1, 0]
# [0, 0, 1, 0]
# [0, 0, 0, 1]
# [0, 0, 0, 1]
# [0, 0, 0, 1]
a_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32)
for i in range(1, self.ndim_y):
for n in range(i):
j = int(0.5 * i * (i - 1) + n)
a_labels[j, i] = 1
# b_labels
# [1, 0, 0, 0]
# [1, 0, 0, 0]
# [0, 1, 0, 0]
# [1, 0, 0, 0]
# [0, 1, 0, 0]
# [0, 0, 1, 0]
b_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32)
for i in range(1, self.ndim_y):
for n in range(i):
j = int(0.5 * i * (i - 1) + n)
b_labels[j, n] = 1
xp = self.xp
if xp is not np:
a_labels = cuda.to_gpu(a_labels)
b_labels = cuda.to_gpu(b_labels)
a_vector = self.cluster_head(a_labels)
b_vector = self.cluster_head(b_labels)
distance = functions.sqrt(functions.sum((a_vector - b_vector) ** 2, axis=1))
# clip
distance = functions.clip(distance, 0.0, float(self.cluster_head_distance_threshold))
return distance
def compute_distance_of_cluster_heads(self):
# list all possible combinations of two cluster heads
num_combination = self.nCr(self.ndim_y, 2)
# a_labels
# [0, 1, 0, 0]
# [0, 0, 1, 0]
# [0, 0, 1, 0]
# [0, 0, 0, 1]
# [0, 0, 0, 1]
# [0, 0, 0, 1]
a_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32)
for i in range(1, self.ndim_y):
for n in range(i):
j = int(0.5 * i * (i - 1) + n)
a_labels[j, i] = 1
# b_labels
# [1, 0, 0, 0]
# [1, 0, 0, 0]
# [0, 1, 0, 0]
# [1, 0, 0, 0]
# [0, 1, 0, 0]
# [0, 0, 1, 0]
b_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32)
for i in range(1, self.ndim_y):
for n in range(i):
j = int(0.5 * i * (i - 1) + n)
b_labels[j, n] = 1
xp = self.xp
if xp is not np:
a_labels = cuda.to_gpu(a_labels)
b_labels = cuda.to_gpu(b_labels)
a_vector = self.cluster_head(a_labels)
b_vector = self.cluster_head(b_labels)
distance = functions.sqrt(functions.sum((a_vector - b_vector) ** 2, axis=1))
# clip
distance = functions.clip(distance, 0.0, float(self.cluster_head_distance_threshold))
return distance
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error,
lambda0=1, lambda1=1, lambda2=1, init_bias_mu=0.0,
init_bias_lv=0.0, intx_term=True, total_nobs=1):
self.n_dim = n_dim
self.n_features = n_features
self.lossfun = lossfun
self.lambda0 = lambda0
self.lambda1 = lambda1
self.lambda2 = lambda2
self.intx_term = intx_term
self.total_nobs = total_nobs
# In contrast to the FM model, the slopes and latent vectors
# will have means (mu) and log variances (lv) for each component.
ones_3d = (1, 1, 1)
super(AutoVFM, self).__init__(bias_mu=L.Bias(shape=(1,)),
bias_lv=L.Bias(shape=(1,)),
slop_mu=L.Bias(shape=(1, 1)),
slop_lv=L.Bias(shape=(1, 1)),
slop_delta_mu=L.EmbedID(n_features, 1,
ignore_label=-1),
slop_delta_lv=L.EmbedID(n_features, 1,
ignore_label=-1),
feat_mu_vec=L.Bias(shape=(1, 1, n_dim)),
feat_lv_vec=L.Bias(shape=(1, 1, n_dim)),
hyper_feat_lv_vec=L.Bias(shape=ones_3d),
feat_delta_mu=L.EmbedID(n_features, n_dim,
ignore_label=-1),
feat_delta_lv=L.EmbedID(n_features, n_dim,
ignore_label=-1),
hyper_feat_delta_lv=L.Bias(shape=ones_3d))
# Xavier initialize weights
c = np.sqrt(n_features * n_dim) * 1e3
d = np.sqrt(n_features) * 1e3
self.feat_delta_mu.W.data[...] = np.random.randn(n_features, n_dim) / c
self.feat_delta_lv.W.data[...] = np.random.randn(n_features, n_dim) / c
self.slop_delta_mu.W.data[...] = np.random.randn(n_features, 1) / d
self.slop_delta_lv.W.data[...] = np.random.randn(n_features, 1) / d
self.bias_mu.b.data[...] *= 0.0
self.bias_mu.b.data[...] += init_bias_mu
self.bias_lv.b.data[...] *= 0.0
self.bias_lv.b.data[...] += init_bias_lv