def __call__(self, y, a, ht, y_lex):
y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
return (y + F.log(y_dict + self.alpha))
#class LinearInterpolationLexicon(chainer.Chain):
# def __init__(self, hidden_size):
# super(LinearInterpolationLexicon, self).__init__(
# perceptron = chainer.links.Linear(hidden_size, 1)
# )
#
# def __call__(self, y, a, ht, y_lex):
# y = F.softmax(y)
# y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
# gamma = F.broadcast_to(F.sigmoid(self.perceptron(ht)), y_dict.data.shape)
# return (gamma * y_dict + (1-gamma) * y)
#
python类log()的实例源码
def listnet(x, t):
"""
The Top-1 approximated ListNet loss as in Cao et al (2006), Learning to
Rank: From Pairwise Approach to Listwise Approach
:param x: The activation of the previous layer
:param t: The target labels
:return: The loss
"""
# ListNet top-1 reduces to a softmax and simple cross entropy
st = F.softmax(t, axis=0)
sx = F.softmax(x, axis=0)
return -F.mean(st * F.log(sx))
def sample_discrete_actions(batch_probs):
"""Sample a batch of actions from a batch of action probabilities.
Args:
batch_probs (ndarray): batch of action probabilities BxA
Returns:
ndarray consisting of sampled action indices
"""
xp = chainer.cuda.get_array_module(batch_probs)
return xp.argmax(
xp.log(batch_probs) + xp.random.gumbel(size=batch_probs.shape),
axis=1).astype(np.int32, copy=False)
def log_prob(self, x):
"""Compute log p(x).
Returns:
chainer.Variable
"""
raise NotImplementedError()
def all_log_prob(self):
with chainer.force_backprop_mode():
if self.min_prob > 0:
return F.log(self.all_prob)
else:
return F.log_softmax(self.beta * self.logits)
def all_log_prob(self):
with chainer.force_backprop_mode():
return F.log(self.all_prob)
def __init__(self, mean, var):
self.mean = _wrap_by_variable(mean)
self.var = _wrap_by_variable(var)
self.ln_var = F.log(var)
def entropy(self):
# Differential entropy of Gaussian is:
# 0.5 * (log(2 * pi * var) + 1)
# = 0.5 * (log(2 * pi) + log var + 1)
with chainer.force_backprop_mode():
return 0.5 * self.mean.data.shape[1] * (np.log(2 * np.pi) + 1) + \
0.5 * F.sum(self.ln_var, axis=1)
def weighted_cross_entropy(p,t,weight_arr,sec_arr,weigh_flag=True):
print("p:{}".format(p.data.shape))
b = np.zeros(p.shape,dtype=np.float32)
b[np.arange(p.shape[0]), t] = 1
soft_arr = F.softmax(p)
log_arr = -F.log(soft_arr)
xent = b*log_arr
#
# print("sec_arr:{}".format(sec_arr))
# print("xent_shape:{}".format(xent.data.shape))
xent = F.split_axis(xent,sec_arr,axis=0)
print([xent_e.data.shape[0] for xent_e in xent])
x_sum = [F.reshape(F.sum(xent_e)/xent_e.data.shape[0],(1,1)) for xent_e in xent]
# print("x_sum:{}".format([x_e.data for x_e in x_sum]))
xent = F.concat(x_sum,axis=0)
#
# print("xent1:{}".format(xent.data))
xent = F.max(xent,axis=1)/p.shape[0]
# print("xent2:{}".format(xent.data))
if not weigh_flag:
return F.sum(xent)
# print("wei_arr:{}".format(weight_arr))
# print("wei_arr:{}".format(weight_arr.data.shape))
print("xent3:{}".format(xent.data.shape))
wxent= F.matmul(weight_arr,xent,transa=True)
wxent = F.sum(F.sum(wxent,axis=0),axis=0)
print("wxent:{}".format(wxent.data))
return wxent
def test_log_forward_cpu(self):
self.check_forward_cpu(F.log, numpy.log)
def test_log_forward_gpu(self):
self.check_forward_gpu(F.log, numpy.log)
def test_log_backward_cpu(self):
self.check_backward_cpu(F.log)
def test_log_backward_gpu(self):
self.check_backward_gpu(F.log)
def test_log(self):
self.assertEqual(F.Log().label, 'log')
def decode(self):
arr_sum = None
a = None
for i, model in enumerate(self.models):
output = model.chainer_model.decode()
output.y = F.log(output.y)
if i == 0:
arr_sum = output.y
if hasattr(output, "a"): a = output.a
else:
arr_sum += output.y
prob = F.exp(F.scale(arr_sum, nmtrain.environment.Variable(self.normalization_constant)))
return nmtrain.models.decoders.Output(y=prob, a=a)
def kld(self, vec_true, vec_compare):
ind = vec_true.data * vec_compare.data > 0
ind_var = chainer.Variable(ind)
include_nan = vec_true * F.log(vec_true / vec_compare)
z = chainer.Variable(np.zeros((len(ind), 1), dtype=np.float32))
# return np.nansum(vec_true * np.log(vec_true / vec_compare))
return F.sum(F.where(ind_var, include_nan, z))
def listwise_cost(self, list_ans, list_pred):
return - np.sum(self.topkprob(list_ans) * np.log(self.topkprob(list_pred)))
def loss_D(self, real_D, fake_D):
batch_size, _, h, w = real_D.shape
loss = - F.sum(F.log(real_D + self.eps) + F.log(1 - fake_D + self.eps)) / (batch_size*h*w)
chainer.report({'loss': loss}, self.D)
return loss
def loss_G(self, real_B, fake_B, fake_D):
loss_l1 = F.mean_absolute_error(real_B, fake_B)
chainer.report({'loss_l1': loss_l1}, self.G)
batch_size, _, h, w = fake_D.shape
loss_D = - F.sum(F.log(fake_D + self.eps)) / (batch_size*h*w)
chainer.report({'loss_D': loss_D}, self.G)
loss = loss_D + self.lambd*loss_l1
chainer.report({'loss': loss}, self.G)
return loss
def __call__(self, x_i, x_j, t_i, t_j):
s_i = self.predictor(x_i)
s_j = self.predictor(x_j)
s_diff = s_i - s_j
if t_i.data > t_j.data:
S_ij = 1
elif t_i.data < t_j.data:
S_ij = -1
else:
S_ij = 0
self.loss = (1 - S_ij) * s_diff / 2. + F.log(1 + F.exp(-s_diff))
return self.loss
def __call__(self, x, test=False):
mu_array1=chainer.Variable(xp.array(xp.zeros([batchsize,784]),dtype=np.float32))
log_std_array1=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,784]),dtype=np.float32))
mu_array2=chainer.Variable(xp.array(xp.zeros([batchsize,1000]),dtype=np.float32))
log_std_array2=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,1000]),dtype=np.float32))
mu_array3=chainer.Variable(xp.array(xp.zeros([batchsize,500]),dtype=np.float32))
log_std_array3=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,500]),dtype=np.float32))
mu_array4=chainer.Variable(xp.array(xp.zeros([batchsize,250]),dtype=np.float32))
log_std_array4=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,250]),dtype=np.float32))
mu_array5=chainer.Variable(xp.array(xp.zeros([batchsize,250]),dtype=np.float32))
log_std_array5=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,250]),dtype=np.float32))
mu_array6=chainer.Variable(xp.array(xp.zeros([batchsize,250]),dtype=np.float32))
log_std_array6=chainer.Variable(xp.log(0.09)*xp.array(xp.ones([batchsize,250]),dtype=np.float32))
x=x+F.gaussian(mu_array1,log_std_array1)
h1=F.leaky_relu(self.bn0(self.l0(x)+F.gaussian(mu_array2,log_std_array2),test),slope=0.1)
h2=F.leaky_relu(self.bn1(self.l1(h1)+F.gaussian(mu_array3,log_std_array3),test),slope=0.1)
h3=F.leaky_relu(self.bn2(self.l2(h2)+F.gaussian(mu_array4,log_std_array4),test),slope=0.1)
h4=F.leaky_relu(self.bn3(self.l3(h3)+F.gaussian(mu_array5,log_std_array5),test),slope=0.1)
h5=F.leaky_relu(self.bn4(self.l4(h4)+F.gaussian(mu_array6,log_std_array6),test),slope=0.1)
h6=F.softmax(self.l5(h5))
return h6
def d_entropy1(y):
y1=F.sum(y,axis=0)/batchsize
y2=F.sum(-y1*F.log(y1))
return y2
def d_entropy2(y):
y1=-y*F.log(y)
y2=F.sum(y1)/batchsize
return y2
# Setup optimizer
def free_energy(self, v):
"""
:param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
:return: scalar
"""
batch_size = v.data.shape[0]
in_channels = self.in_channels
real = self.real
if real == 0:
'''
visible layer is 0, 1 (bit)
vbias_term = 1 * SUM(a(i) * v(i))
'''
v_sum = F.sum(v, axis=(2, 3)) # sum over image_height & image_width
# Originally, it should return sum for each batch.
# but it returns scalar, which is sum over batches, since sum is used at the end anyway.
vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
wx_b = self.conv(v)
else:
'''
visible layer takes real value
vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
'''
#TODO: check
#m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
xp = cuda.get_array_module(n.data)
std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))
#v_ = v - F.matmul(m, n)
v_ = (v - F.broadcast_to(n, v.data.shape)) / std_ch
vbias_term = F.sum(0.5 * v_ * v_)
wx_b = self.conv(v / std_ch)
hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
# print('vbias = ', vbias_term.data, ', hidden = ', hidden_term.data, 'F.exp(wx_b) = ', F.exp(wx_b).data)
return - vbias_term - hidden_term
def sample_ax_y_gumbel(self, a, x, temperature=10, test=False):
a = self.to_variable(a)
x = self.to_variable(x)
batchsize = self.get_batchsize(x)
log_q_y = self.q_y_ax(a, x, test=test)
eps = 1e-16
u = np.random.uniform(0, 1, log_q_y.shape).astype(x.dtype)
g = self.to_variable(-np.log(-np.log(u + eps) + eps))
sampled_y = F.softmax((log_q_y + g) / temperature)
return sampled_y
def gaussian_nll_keepbatch(self, x, mean, ln_var, clip=True):
if clip:
clip_min = math.log(0.01)
clip_max = math.log(10)
ln_var = F.clip(ln_var, clip_min, clip_max)
x_prec = F.exp(-ln_var)
x_diff = x - mean
x_power = (x_diff * x_diff) * x_prec * 0.5
# print "nll"
# print cuda.cupy.amax(x.data), cuda.cupy.amin(x.data)
# print cuda.cupy.amax(ln_var.data), cuda.cupy.amin(ln_var.data)
# print cuda.cupy.amax(x_prec.data), cuda.cupy.amin(x_prec.data)
# print cuda.cupy.amax(x_power.data), cuda.cupy.amin(x_power.data)
return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1)
def log_py(self, y):
xp = self.xp
n_types_of_label = y.data.shape[1]
# prior p(y) expecting that all classes are evenly distributed
constant = math.log(1.0 / n_types_of_label)
log_py = xp.full((y.data.shape[0],), constant, xp.float32)
return self.to_variable(log_py)
def log_pz(self, z):
log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2
return F.sum(log_pz, axis=1)
# compute lower bound using gumbel-softmax
def gaussian_nll_keepbatch(self, x, mean, ln_var, clip=True):
if clip:
clip_min = math.log(0.001)
clip_max = math.log(10)
ln_var = F.clip(ln_var, clip_min, clip_max)
x_prec = F.exp(-ln_var)
x_diff = x - mean
x_power = (x_diff * x_diff) * x_prec * 0.5
return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1)
def log_py(self, y, test=False):
xp = self.xp
num_types_of_label = y.data.shape[1]
# prior p(y) expecting that all classes are evenly distributed
constant = math.log(1.0 / num_types_of_label)
log_py = xp.full((y.data.shape[0],), constant, xp.float32)
return Variable(log_py)
# this will not be used