def KLDGaussian(Q, N, eps=1e-8):
"""KL Divergence between two Gaussians
Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T}
and N ~ N(mu1, \sigma_1)
"""
sum = lambda x: torch.sum(x, dim=1)
k = float(Q.mu.size()[1]) # dimension of distribution
mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu
s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps
a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term
b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term
c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term.
#
# print('trace: %s' % a)
# print('mu_diff: %s' % b)
# print('k: %s' % k)
# print('det: %s' % c)
return 0.5 * (a + b - k + c)
python类log()的实例源码
def _boxes2delta(self, box, anchor):
"""
box: (x_min, y_min, x_max, y_max)
anchor: (cx, cy, w, h)
"""
# change (x_min, y_min, x_max, y_max) to (cx, cy, w, h)
box_wh = box.clone()
box_wh[:2] = (box[:2] + box[2:]) / 2
box_wh[2:] = box[2:] - box[:2]
box_wh[0::2] *= self.W
box_wh[1::2] *= self.H
# calc (dcx, dcy, dw, dh)
box_delta = box.clone().fill_(0)
box_delta[:2] = box_wh[:2] - anchor[:2]
box_delta[2:] = torch.log(box_wh[2:]/anchor[2:])
return box_delta
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def poisson_loss(observed_ratings, predicted_ratings):
"""
Poisson loss.
Parameters
----------
observed_ratings: tensor
Tensor containing observed ratings.
predicted_ratings: tensor
Tensor containing rating predictions.
Returns
-------
loss, float
The mean value of the loss function.
"""
assert_no_grad(observed_ratings)
return (predicted_ratings - observed_ratings * torch.log(predicted_ratings)).mean()
def backward(self, grad_output):
z, log_phi_z = self.saved_tensors
log_phi_z_grad = z.new().resize_as_(z).zero_()
z_is_small = z.lt(-1)
z_is_not_small = 1 - z_is_small
if z_is_small.sum() > 0:
log_phi_z_grad[z_is_small] = torch.abs(self.denominator.div(self.numerator)).mul(math.sqrt(2 / math.pi))
exp = z[z_is_not_small].pow(2) \
.div(-2) \
.sub(log_phi_z[z_is_not_small]) \
.add(math.log(0.5))
log_phi_z_grad[z_is_not_small] = torch.exp(exp).mul(math.sqrt(2 / math.pi))
return log_phi_z_grad.mul(grad_output)
def logsumexp(x, dim=None):
"""
Args:
x: A pytorch tensor (any dimension will do)
dim: int or None, over which to perform the summation. `None`, the
default, performs over all axes.
Returns: The result of the log(sum(exp(...))) operation.
"""
if dim is None:
xmax = x.max()
xmax_ = x.max()
return xmax_ + numpy.log(torch.exp(x - xmax).sum())
else:
xmax, _ = x.max(dim, keepdim=True)
xmax_, _ = x.max(dim)
return xmax_ + torch.log(torch.exp(x - xmax).sum(dim))
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths)
targets_dh = torch.log(gt_heights / ex_heights)
targets = torch.stack(
(targets_dx, targets_dy, targets_dw, targets_dh),1)
return targets
def forward(self, y, weights, mean, std):
"""
Presents a maximum a-priori objective for a set of predicted means, mixture components,
and standard deviations to model a given ground-truth 'y'. Modeled using negative log
likelihood.
:param y: Non-linear target.
:param weights: Predicted mixture components.
:param mean: Predicted mixture means.
:param std: Predicted mixture standard deviations.
:return:
"""
normalization = 1.0 / ((2.0 * math.pi) ** 0.5)
gaussian_sample = (y.expand_as(mean) - mean) * torch.reciprocal(std)
gaussian_sample = normalization * torch.reciprocal(std) * torch.exp(-0.5 * gaussian_sample ** 2)
return -torch.mean(torch.log(torch.sum(weights * gaussian_sample, dim=1)))
def experiments_randseeds(opt, start = 0, end = 5):
random_seeds = [1, 101, 512, 1001, 10001]
original_exp = opt.experiment
file_name = '{0}_{1}_{2}_{3}_{4}_experiments.csv'.format(opt.dataset, opt.D, opt.A, opt.H, opt.critic_last_layer)
csv_file = os.path.join(opt.experiment, file_name)
with open(csv_file, 'a') as out:
max_logprob = 0
best_config = ''
for i in range(start, end):
rand_seed = random_seeds[i]
opt.manualSeed = rand_seed
try:
opt.experiment = os.path.join(original_exp, '{0}_{1}_{2}_{3}_{4}_{5}'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer))
if not os.path.exists(opt.experiment):
os.makedirs(opt.experiment)
logprob = train(opt=opt, log_file_path=os.path.join(opt.experiment, '{0}_{1}_{2}_{3}_{4}_{5}_experiments.log'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer)))
config = '{0},{1}\n'.format(rand_seed, logprob)
if max_logprob == 0 or logprob > max_logprob:
max_logprob = logprob
best_config = config
out.write(config)
out.flush()
print('best %s ' % best_config)
except:
traceback.print_exc()
def accumulate_gradient(self, batch_sz, states, actions, rewards,
next_states, mask):
""" Compute the difference between the return distributions of Q(s,a)
and TQ(s_,a).
"""
states = Variable(states)
actions = Variable(actions)
next_states = Variable(next_states, volatile=True)
# Compute probabilities of Q(s,a*)
q_probs = self.policy(states)
actions = actions.view(batch_sz, 1, 1)
action_mask = actions.expand(batch_sz, 1, self.atoms_no)
qa_probs = q_probs.gather(1, action_mask).squeeze()
# Compute distribution of Q(s_,a)
target_qa_probs = self._get_categorical(next_states, rewards, mask)
# Compute the cross-entropy of phi(TZ(x_,a)) || Z(x,a)
qa_probs.data.clamp_(0.01, 0.99) # Tudor's trick for avoiding nans
loss = - torch.sum(target_qa_probs * torch.log(qa_probs))
# Accumulate gradients
loss.backward()
def logsumexp(x, axis=None, keepdims=False):
def _logsumexp(x, axis=axis, keepdims=keepdims):
y = torch.log(torch.sum(torch.exp(x), axis))
return y if keepdims else torch.squeeze(y, axis)
def _compute_output_shape(x, axis=axis, keepdims=keepdims):
if axis is None:
return ()
shape = list(_get_shape(x))
if keepdims:
shape[axis] = 1
else:
del shape[axis]
return tuple(shape)
return get_op(_logsumexp, output_shape=_compute_output_shape, arguments=[axis, keepdims])(x)
def compute_loss(self, input, e, b, clusters, it=0):
Loss = Variable(torch.zeros((self.batch_size))).type(dtype)
Ls = Variable(torch.zeros((self.batch_size))).type(dtype)
for cl in range(clusters // 2):
L, m1, m2 = self.compute_diameter(input, e, cl, it=it)
mask = ((e / 2).type(dtype_l) == cl).type(dtype)
# print('mask', mask[0])
n = mask.sum(1).squeeze()
n += (n == 0).type(dtype)
# print('mask', mask[0])
log_probs = torch.log((1 - b) * m1 + b * m2 + (1 - mask) + 1e-8)
Loss += L * log_probs.sum(1) / n
Ls += L
Ls = Ls.mean(0)
Loss = Loss.mean(0)
return Loss, Ls
###########################################################################
# Split Phase #
###########################################################################
def logaddexp(x1: T.FloatTensor, x2: T.FloatTensor) -> T.FloatTensor:
"""
Elementwise logaddexp function: log(exp(x1) + exp(x2))
Args:
x1: A tensor.
x2: A tensor.
Returns:
tensor: Elementwise logaddexp.
"""
# log(exp(x1) + exp(x2))
# = log( exp(x1) (1 + exp(x2 - x1))) = x1 + log(1 + exp(x2 - x1))
# = log( exp(x2) (exp(x1 - x2) + 1)) = x2 + log(1 + exp(x1 - x2))
diff = torch.min(x2 - x1, x1 - x2)
return torch.max(x1, x2) + torch.log1p(exp(diff))
def cross_entropy_loss(self, x, y):
'''Cross entropy loss w/o averaging across all samples.
Args:
x: (tensor) sized [N,D].
y: (tensor) sized [N,].
Return:
(tensor) cross entroy loss, sized [N,].
'''
# print(x.size()) # [8732, 16]
xmax = x.data.max()
# print(x.data.size()) # [8732, 16]
# print(xmax.size()) # max--float object
log_sum_exp = torch.log(torch.sum(torch.exp(x-xmax), 1)) + xmax
# print(log_sum_exp.size()) # [8732,]
# print(x.gather(1, y.view(-1,1)).size()) # [8732, 1]
# print((log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))).size())
return log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths)
targets_dh = torch.log(gt_heights / ex_heights)
targets = torch.stack(
(targets_dx, targets_dy, targets_dw, targets_dh), 1)
return targets
def logp(x, mean, std):
out = 0.5 * ((x - mean) / (std))**2 + 0.5 * LOG2PI + th.log(std)
return -out
def EntropicConfusion(features):
batch_size = features.size(0)
return torch.mul(features, torch.log(features)).sum() * (1.0 / batch_size)
def forward(self, output, target):
cross_entropy = F.cross_entropy(output, target)
cross_entropy_log = torch.log(cross_entropy)
focal_loss = -((1 - cross_entropy) ** self.focusing_param) * cross_entropy_log
balanced_focal_loss = self.balance_param * focal_loss
return balanced_focal_loss
def pretrain(self, x, pt_epochs, verbose=True):
n = x.data.size()[0]
num_batches = n / self.batch_size
t = x
# Pre-train 1 autoencoder at a time
for i, ae_re in enumerate(self.autoencoders_ref):
# Get the current autoencoder
ae = getattr(self.sequential, ae_re)
# Getting encoded output from the previous autoencoder
if i > 0:
# Set the requires_grad to False so that backprop doesn't
# travel all the way back to the previous autoencoder
temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False)
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1])
temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data
t = temp
optimizer = SGD(ae.parameters(), lr=self.pre_lr)
# Pre-training
print("Pre-training Autoencoder:", i)
for ep in range(pt_epochs):
agg_cost = 0.
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
bt = t[start:end]
optimizer.zero_grad()
z = ae.encode(bt, add_noise=True)
z = ae.decode(z)
loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1)
cost = torch.mean(loss)
cost.backward()
optimizer.step()
agg_cost += cost
agg_cost /= num_batches
if verbose:
print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
def test_log(self):
self._testMath(torch.log, lambda x: math.log(x) if x > 0 else float('nan'))