def evaluate():
"""evaluate the model while training"""
model.eval() # turn on the eval() switch to disable dropout
total_loss = 0
total_correct = 0
for batch, i in enumerate(range(0, len(data_val), args.batch_size)):
data, targets = package(data_val[i:min(len(data_val), i+args.batch_size)], volatile=True)
if args.cuda:
data = data.cuda()
targets = targets.cuda()
hidden = model.init_hidden(data.size(1))
output, attention = model.forward(data, hidden)
output_flat = output.view(data.size(1), -1)
total_loss += criterion(output_flat, targets).data
prediction = torch.max(output_flat, 1)[1]
total_correct += torch.sum((prediction == targets).float())
return total_loss[0] / (len(data_val) // args.batch_size), total_correct.data[0] / len(data_val)
python类sum()的实例源码
train.py 文件源码
项目:Structured-Self-Attentive-Sentence-Embedding
作者: ExplorerFreda
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def train_ae(self, train_X, optimizer, epochs, verbose=True):
N = train_X.data.size()[0]
num_batches = N / self.batch_size
for e in range(epochs):
agg_cost = 0.
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
bX = train_X[start:end]
optimizer.zero_grad()
Z = self.forward(bX)
Z = self.decode(Z)
loss = -torch.sum(bX * torch.log(Z) + (1.0 - bX) * torch.log(1.0 - Z), 1)
cost = torch.mean(loss)
cost.backward()
optimizer.step()
agg_cost += cost
agg_cost /= num_batches
if verbose:
print("Epoch:", e, "cost:", agg_cost.data[0])
def node_forward(self, inputs, child_c, child_h):
child_h_sum = torch.sum(child_h, dim=0, keepdim=True)
iou = self.ioux(inputs) + self.iouh(child_h_sum)
i, o, u = torch.split(iou, iou.size(1) // 3, dim=1)
i, o, u = F.sigmoid(i), F.sigmoid(o), F.tanh(u)
f = F.sigmoid(
self.fh(child_h) +
self.fx(inputs).repeat(len(child_h), 1)
)
fc = torch.mul(f, child_c)
c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True)
h = torch.mul(o, F.tanh(c))
return c, h
def forward(self, input, target):
buffer = input.new()
buffer.resize_as_(input).copy_(input)
buffer[torch.eq(target, -1.)] = 0
output = buffer.sum()
buffer.fill_(self.margin).add_(-1, input)
buffer.cmax_(0)
buffer[torch.eq(target, 1.)] = 0
output += buffer.sum()
if self.size_average:
output = output / input.nelement()
self.save_for_backward(input, target)
return input.new((output,))
def updateOutput(self, input):
self._assertInput(input)
# set up buffer:
self.buff2 = self.buff2 or input[0].new()
self.buff2.resize_as_(input[1])
# compute output scores:
self.output.resize_(input[0].size(0), self.weight.size(0))
for k in range(self.weight.size(0)):
torch.mm(self.buff2, input[0], self.weight[k])
self.buff2.mul_(input[1])
torch.sum(self.output.narrow(1, k, 1), self.buff2, 1)
if self.bias:
self.output.add_(self.bias.view(1, self.bias.nelement()).expand_as(self.output))
return self.output
def sym_distance_matrix(A, B, eps=1e-18, self_similarity=False):
"""
Defines the symbolic matrix that contains the distances between the vectors of A and B
:param A: the first data matrix
:param B: the second data matrix
:param self_similarity: zeros the diagonial to improve the stability
:params eps: the minimum distance between two vectors (set to a very small number to improve stability)
:return:
"""
# Compute the squared distances
AA = torch.sum(A * A, 1).view(-1, 1)
BB = torch.sum(B * B, 1).view(1, -1)
AB = torch.mm(A, B.transpose(0, 1))
D = AA + BB - 2 * AB
# Zero the diagonial
if self_similarity:
D = D.view(-1)
D[::B.size(0) + 1] = 0
D = D.view(A.size(0), B.size(0))
# Return the square root
D = torch.sqrt(torch.clamp(D, min=eps))
return D
def test_forward_works_without_mask(self):
log_likelihood = self.crf(self.logits, self.tags).data[0]
# Now compute the log-likelihood manually
manual_log_likelihood = 0.0
# For each instance, manually compute the numerator
# (which is just the score for the logits and actual tags)
# and the denominator
# (which is the log-sum-exp of the scores for the logits across all possible tags)
for logits_i, tags_i in zip(self.logits, self.tags):
numerator = self.score(logits_i.data, tags_i.data)
all_scores = [self.score(logits_i.data, tags_j) for tags_j in itertools.product(range(5), repeat=3)]
denominator = math.log(sum(math.exp(score) for score in all_scores))
# And include them in the manual calculation.
manual_log_likelihood += numerator - denominator
# The manually computed log likelihood should equal the result of crf.forward.
assert manual_log_likelihood == approx(log_likelihood)
def test_contrastive_loss_value(self):
x0_val = Variable(self.x0)
x1_val = Variable(self.x1)
t_val = Variable(self.t)
tml = ContrastiveLoss(margin=self.margin)
loss = tml.forward(x0_val, x1_val, t_val)
self.assertEqual(loss.data.numpy().shape, (1, ))
self.assertEqual(loss.data.numpy().dtype, np.float32)
loss_value = float(loss.data.numpy())
# Compute expected value
loss_expect = 0
for i in range(self.x0.size()[0]):
x0d, x1d, td = self.x0[i], self.x1[i], self.t[i]
d = torch.sum(torch.pow(x0d - x1d, 2))
if td == 1: # similar pair
loss_expect += d
elif td == 0: # dissimilar pair
loss_expect += max(1 - np.sqrt(d), 0)**2
loss_expect /= 2.0 * self.t.size()[0]
print("expected %s got %s" % (loss_expect, loss_value))
self.assertAlmostEqual(loss_expect, loss_value, places=5)
def update_memories_with_extra_features_(self, memory_lengths, memories):
memory_lengths = memory_lengths.data
memories = memories.data
if self.extra_features_slots > 0:
num_nonempty_memories = memory_lengths.ne(0).sum()
updated_memories = memories.new(memories.numel() + num_nonempty_memories * self.extra_features_slots)
src_offset = 0
dst_offset = 0
for i in range(memory_lengths.size(0)):
for j in range(self.opt['mem_size']):
length = memory_lengths[i, j]
if length > 0:
if self.opt['time_features']:
updated_memories[dst_offset] = self.time_feature(j)
dst_offset += 1
updated_memories[dst_offset:dst_offset + length] = memories[src_offset:src_offset + length]
src_offset += length
dst_offset += length
memory_lengths += memory_lengths.ne(0).long() * self.extra_features_slots
memories.set_(updated_memories)
def get_accuracy(data_loader, classifier_fn, batch_size):
"""
compute the accuracy over the supervised training set or the testing set
"""
predictions, actuals = [], []
# use the appropriate data loader
for (xs, ys) in data_loader:
# use classification function to compute all predictions for each batch
xs, ys = Variable(xs), Variable(ys)
predictions.append(classifier_fn(xs))
actuals.append(ys)
# compute the number of accurate predictions
accurate_preds = 0
for pred, act in zip(predictions, actuals):
for i in range(pred.size(0)):
v = torch.sum(pred[i] == act[i])
accurate_preds += (v.data[0] == 10)
# calculate the accuracy between 0 and 1
accuracy = (accurate_preds * 1.0) / (len(predictions) * batch_size)
return accuracy
def setUp(self):
# simple Gaussian-emission HMM
def model():
p_latent = pyro.param("p1", Variable(torch.Tensor([[0.7], [0.3]])))
p_obs = pyro.param("p2", Variable(torch.Tensor([[0.9], [0.1]])))
latents = [Variable(torch.ones(1, 1))]
observes = []
for t in range(self.model_steps):
latents.append(
pyro.sample("latent_{}".format(str(t)),
Bernoulli(torch.index_select(p_latent, 0, latents[-1].view(-1).long()))))
observes.append(
pyro.observe("observe_{}".format(str(t)),
Bernoulli(torch.index_select(p_obs, 0, latents[-1].view(-1).long())),
self.data[t]))
return torch.sum(torch.cat(latents))
self.model_steps = 3
self.data = [pyro.ones(1, 1) for _ in range(self.model_steps)]
self.model = model
def setUp(self):
# lognormal-normal model
# putting some of the parameters inside of a torch module to
# make sure that that functionality is ok (XXX: do this somewhere else in the future)
self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter
# normal prior hyperparameter
self.tau0 = Variable(torch.Tensor([1.0]))
# known precision for observation likelihood
self.tau = Variable(torch.Tensor([2.5]))
self.n_data = 2
self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations
self.tau_n = self.tau0 + \
Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau
mu_numerator = self.mu0 * self.tau0 + \
self.tau * torch.sum(torch.log(self.data))
self.mu_n = mu_numerator / self.tau_n # posterior mu
self.log_mu_n = torch.log(self.mu_n)
self.log_tau_n = torch.log(self.tau_n)
def _test_jacobian(self, input_dim, hidden_dim, multiplier):
jacobian = torch.zeros(input_dim, input_dim)
arn = AutoRegressiveNN(input_dim, hidden_dim, multiplier)
def nonzero(x):
return torch.sign(torch.abs(x))
for output_index in range(multiplier):
for j in range(input_dim):
for k in range(input_dim):
x = Variable(torch.randn(1, input_dim))
epsilon_vector = torch.zeros(1, input_dim)
epsilon_vector[0, j] = self.epsilon
delta = (arn(x + Variable(epsilon_vector)) - arn(x)) / self.epsilon
jacobian[j, k] = float(delta[0, k + output_index * input_dim].data.cpu().numpy()[0])
permutation = arn.get_permutation()
permuted_jacobian = jacobian.clone()
for j in range(input_dim):
for k in range(input_dim):
permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]]
lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=0))
self.assertTrue(lower_sum == float(0.0))
def setUp(self):
# lognormal-normal model
# putting some of the parameters inside of a torch module to
# make sure that that functionality is ok (XXX: do this somewhere else in the future)
self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter
# normal prior hyperparameter
self.tau0 = Variable(torch.Tensor([1.0]))
# known precision for observation likelihood
self.tau = Variable(torch.Tensor([2.5]))
self.n_data = 2
self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations
self.tau_n = self.tau0 + \
Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau
mu_numerator = self.mu0 * self.tau0 + \
self.tau * torch.sum(torch.log(self.data))
self.mu_n = mu_numerator / self.tau_n # posterior mu
self.log_mu_n = torch.log(self.mu_n)
self.log_tau_n = torch.log(self.tau_n)
self.verbose = True
def log_beta(t):
"""
Computes log Beta function.
:param t:
:type t: torch.autograd.Variable of dimension 1 or 2
:rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2)
"""
assert t.dim() in (1, 2)
if t.dim() == 1:
numer = torch.sum(log_gamma(t))
denom = log_gamma(torch.sum(t))
else:
numer = torch.sum(log_gamma(t), 1)
denom = log_gamma(torch.sum(t, 1))
return numer - denom
def softmax(x, dim=-1):
"""
TODO: change to use the default pyTorch implementation when available
Source: https://discuss.pytorch.org/t/why-softmax-function-cant-specify-the-dimension-to-operate/2637
:param x: tensor
:param dim: Dimension to apply the softmax function to. The elements of the tensor in this
dimension must sum to 1.
:return: tensor having the same dimension as `x` rescaled along dim
"""
input_size = x.size()
trans_input = x.transpose(dim, len(input_size) - 1)
trans_size = trans_input.size()
input_2d = trans_input.contiguous().view(-1, trans_size[-1])
try:
soft_max_2d = F.softmax(input_2d, 1)
except TypeError:
# Support older pytorch 0.2 release.
soft_max_2d = F.softmax(input_2d)
soft_max_nd = soft_max_2d.view(*trans_size)
return soft_max_nd.transpose(dim, len(input_size) - 1)
def __call__(self, x, index=None):
output = self.pretrained_model(x)
if index is None:
index = np.argmax(output.data.cpu().numpy())
one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
one_hot[0][index] = 1
if self.cuda:
one_hot = Variable(torch.from_numpy(one_hot).cuda(), requires_grad=True)
else:
one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True)
one_hot = torch.sum(one_hot * output)
one_hot.backward(retain_variables=True)
grad = x.grad.data.cpu().numpy()
grad = grad[0, :, :, :]
return grad
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def preProc2(x):
# Access the global variables
global P, expP, negExpP
P = P.type_as(x)
expP = expP.type_as(x)
negExpP = negExpP.type_as(x)
# Create a variable filled with -1. Second part of the condition
z = Variable(torch.zeros(x.size())).type_as(x)
absX = torch.abs(x)
cond1 = torch.gt(absX, negExpP)
cond2 = torch.le(absX, negExpP)
if (torch.sum(cond1) > 0).data.all():
x1 = torch.sign(x[cond1])
z[cond1] = x1
if (torch.sum(cond2) > 0).data.all():
x2 = x[cond2]*expP
z[cond2] = x2
return z
def _forward_alg(self, feats):
# calculate in log domain
# feats is len(sentence) * tagset_size
# initialize alpha with a Tensor with values all equal to -10000.
init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
forward_var = autograd.Variable(init_alphas)
if self.use_gpu:
forward_var = forward_var.cuda()
for feat in feats:
emit_score = feat.view(-1, 1)
tag_var = forward_var + self.transitions + emit_score
max_tag_var, _ = torch.max(tag_var, dim=1)
tag_var = tag_var - max_tag_var.view(-1, 1)
forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1)
terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1)
alpha = log_sum_exp(terminal_var)
# Z(x)
return alpha
def forward(self, pos_u, pos_v, neg_u, neg_v):
losses = []
emb_u = []
for i in range(len(pos_u)):
emb_ui = self.u_embeddings(Variable(torch.LongTensor(pos_u[i])))
emb_u.append(np.sum(emb_ui.data.numpy(), axis=0).tolist())
emb_u = Variable(torch.FloatTensor(emb_u))
emb_v = self.v_embeddings(Variable(torch.LongTensor(pos_v)))
score = torch.mul(emb_u, emb_v)
score = torch.sum(score, dim=1)
score = F.logsigmoid(score)
losses.append(sum(score))
neg_emb_u = []
for i in range(len(neg_u)):
neg_emb_ui = self.u_embeddings(Variable(torch.LongTensor(neg_u[i])))
neg_emb_u.append(np.sum(neg_emb_ui.data.numpy(), axis=0).tolist())
neg_emb_u = Variable(torch.FloatTensor(neg_emb_u))
neg_emb_v = self.v_embeddings(Variable(torch.LongTensor(neg_v)))
neg_score = torch.mul(neg_emb_u, neg_emb_v)
neg_score = torch.sum(neg_score, dim=1)
neg_score = F.logsigmoid(-1 * neg_score)
losses.append(sum(neg_score))
return -1 * sum(losses)
def _forward(self, batch):
_, questions, passages, answers, _ = batch
batch_num = questions.tensor.size(0)
questions.variable()
passages.variable()
begin_, end_ = self.model(questions, passages) # batch x seq
assert begin_.size(0) == batch_num
answers = Variable(answers)
if torch.cuda.is_available():
answers = answers.cuda()
begin, end = answers[:, 0], answers[:, 1]
loss = self.loss_fn(begin_, begin) + self.loss_fn(end_, end)
_, pred_begin = torch.max(begin_, 1)
_, pred_end = torch.max(end_, 1)
exact_correct_num = torch.sum(
(pred_begin == begin) * (pred_end == end))
em = exact_correct_num.data[0] / batch_num
return loss, em
def build_loss_objectiveness(self, region_objectiveness, targets):
loss_objectiveness = F.cross_entropy(region_objectiveness, targets)
maxv, predict = region_objectiveness.data.max(1)
labels = targets.squeeze()
fg_cnt = torch.sum(labels.data.ne(0))
bg_cnt = labels.data.numel() - fg_cnt
if fg_cnt > 0:
self.tp_reg = torch.sum(predict[:fg_cnt].eq(labels.data[:fg_cnt]))
else:
self.tp_reg = 0.
if bg_cnt > 0:
self.tf_reg = torch.sum(predict[fg_cnt:].eq(labels.data[fg_cnt:]))
else:
self.tp_reg = 0.
self.fg_cnt_reg = fg_cnt
self.bg_cnt_reg = bg_cnt
return loss_objectiveness
attack_carlini_wagner_l2.py 文件源码
项目:pytorch-nips2017-attack-example
作者: rwightman
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _loss(self, output, target, dist, scale_const):
# compute the probability of the label class versus the maximum other
real = (target * output).sum(1)
other = ((1. - target) * output - target * 10000.).max(1)[0]
if self.targeted:
# if targeted, optimize for making the other class most likely
loss1 = torch.clamp(other - real + self.confidence, min=0.) # equiv to max(..., 0.)
else:
# if non-targeted, optimize for making this class least likely.
loss1 = torch.clamp(real - other + self.confidence, min=0.) # equiv to max(..., 0.)
loss1 = torch.sum(scale_const * loss1)
loss2 = dist.sum()
loss = loss1 + loss2
return loss
def forward(self, input):
batch_size = input.size(0)
num_channels = input.size(1)
h = input.size(2)
w = input.size(3)
n = h * w # number of regions
kmax = self.get_positive_k(self.kmax, n)
kmin = self.get_positive_k(self.kmin, n)
sorted, indices = input.new(), input.new().long()
torch.sort(input.view(batch_size, num_channels, n), dim=2, descending=True, out=(sorted, indices))
self.indices_max = indices.narrow(2, 0, kmax)
output = sorted.narrow(2, 0, kmax).sum(2).div_(kmax)
if kmin > 0 and self.alpha is not 0:
self.indices_min = indices.narrow(2, n - kmin, kmin)
output.add_(sorted.narrow(2, n - kmin, kmin).sum(2).mul_(self.alpha / kmin)).div_(2)
self.save_for_backward(input)
return output.view(batch_size, num_channels)
def KLDGaussian(Q, N, eps=1e-8):
"""KL Divergence between two Gaussians
Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T}
and N ~ N(mu1, \sigma_1)
"""
sum = lambda x: torch.sum(x, dim=1)
k = float(Q.mu.size()[1]) # dimension of distribution
mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu
s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps
a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term
b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term
c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term.
#
# print('trace: %s' % a)
# print('mu_diff: %s' % b)
# print('k: %s' % k)
# print('det: %s' % c)
return 0.5 * (a + b - k + c)
def compute_loss(x_dec, x_next_pred_dec, x, x_next,
Qz, Qz_next_pred,
Qz_next):
# Reconstruction losses
if False:
x_reconst_loss = (x_dec - x_next).pow(2).sum(dim=1)
x_next_reconst_loss = (x_next_pred_dec - x_next).pow(2).sum(dim=1)
else:
x_reconst_loss = -binary_crossentropy(x, x_dec).sum(dim=1)
x_next_reconst_loss = -binary_crossentropy(x_next, x_next_pred_dec).sum(dim=1)
logvar = Qz.logsigma.mul(2)
KLD_element = Qz.mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
KLD = torch.sum(KLD_element, dim=1).mul(-0.5)
# ELBO
bound_loss = x_reconst_loss.add(x_next_reconst_loss).add(KLD)
kl = KLDGaussian(Qz_next_pred, Qz_next)
return bound_loss.mean(), kl.mean()
def level_curves(fname, npoints = 200, smoothing = 10, level = 0.5) :
"Loads regularly sampled curves from a .PNG image."
# Find the contour lines
img = misc.imread(fname, flatten = True) # Grayscale
img = (img.T[:, ::-1]) / 255.
img = gaussian_filter(img, smoothing, mode='nearest')
lines = find_contours(img, level)
# Compute the sampling ratio for every contour line
lengths = np.array( [arclength(line) for line in lines] )
points_per_line = np.ceil( npoints * lengths / np.sum(lengths) )
# Interpolate accordingly
points = [] ; connec = [] ; index_offset = 0
for ppl, line in zip(points_per_line, lines) :
(p, c) = resample(line, ppl)
points.append(p)
connec.append(c + index_offset)
index_offset += len(p)
size = np.maximum(img.shape[0], img.shape[1])
points = np.vstack(points) / size
connec = np.vstack(connec)
return Curve(points, connec)
# Pyplot Output =================================================================================
def _kernel_matching(q1_x, q1_mu, xt_x, xt_mu, radius) :
"""
Given two measures q1 and xt represented by locations/weights arrays,
outputs a kernel-fidelity term and an empty 'info' array.
"""
K_qq, K_qx, K_xx = _cross_kernels(q1_x, xt_x, radius)
cost = .5 * ( torch.sum(K_qq * torch.ger(q1_mu,q1_mu)) \
+ torch.sum(K_xx * torch.ger(xt_mu,xt_mu)) \
-2*torch.sum(K_qx * torch.ger(q1_mu,xt_mu)) )
# Info = the 2D graph of the blurred distance function
# Increase res if you want to get nice smooth pictures...
res = 10 ; ticks = np.linspace( 0, 1, res + 1)[:-1] + 1/(2*res)
X,Y = np.meshgrid( ticks, ticks )
points = Variable(torch.from_numpy(np.vstack( (X.ravel(), Y.ravel()) ).T).type(dtype), requires_grad=False)
info = _k( points, q1_x , radius ) @ q1_mu \
- _k( points, xt_x , radius ) @ xt_mu
return [cost , info.view( (res,res) ) ]
def _kernel_matching(q1_x, q1_mu, xt_x, xt_mu, radius) :
"""
Given two measures q1 and xt represented by locations/weights arrays,
outputs a kernel-fidelity term and an empty 'info' array.
"""
K_qq, K_qx, K_xx = _cross_kernels(q1_x, xt_x, radius)
cost = .5 * ( torch.sum(K_qq * torch.ger(q1_mu,q1_mu)) \
+ torch.sum(K_xx * torch.ger(xt_mu,xt_mu)) \
-2*torch.sum(K_qx * torch.ger(q1_mu,xt_mu)) )
# Info = the 2D graph of the blurred distance function
# Increase res if you want to get nice smooth pictures...
res = 10 ; ticks = np.linspace( 0, 1, res + 1)[:-1] + 1/(2*res)
X,Y = np.meshgrid( ticks, ticks )
points = Variable(torch.from_numpy(np.vstack( (X.ravel(), Y.ravel()) ).T).type(dtype), requires_grad=False)
info = _k( points, q1_x , radius ) @ q1_mu \
- _k( points, xt_x , radius ) @ xt_mu
return [cost , info.view( (res,res) ) ]