def forward(self, inputs):
# set up batch size
batch_size = inputs.size(0)
# compute hidden and cell
hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
hidden_cell = (hidden, cell)
# recurrent neural networks
outputs, _ = self.rnn.forward(inputs, hidden_cell)
outputs = outputs.contiguous().view(-1, self.hidden_size * 2)
# compute classifications by outputs
outputs = self.classifier.forward(outputs)
outputs = F.softmax(outputs)
outputs = outputs.view(batch_size, -1, self.num_classes)
return outputs
python类softmax()的实例源码
def forward(self, hidden, encoder_outputs):
# hidden.size() = (B, H), encoder_outputs.size() = (B, S, H)
batch_size, encoder_outputs_len, _ = encoder_outputs.size()
# Create variable to store attention energies
# attn_energies.size() = (B, S)
attn_energies = Variable(torch.zeros((batch_size, encoder_outputs_len))) # B x S
if Config.use_cuda: attn_energies = attn_energies.cuda()
# Calculate energies for each encoder output
# attn_energies.size() = (B, S)
for i in range(encoder_outputs_len):
attn_energies[:, i] = self.score(hidden, encoder_outputs[:, i])
# print attn_energies[:, i]
# Normalize energies to weights in range 0 to 1
return F.softmax(attn_energies)
def forward(self, input, source_hids):
# input: bsz x input_embed_dim
# source_hids: srclen x bsz x output_embed_dim
# x: bsz x output_embed_dim
x = self.input_proj(input)
# compute attention
attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz
# sum weighted sources
x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
return x, attn_scores
def forward(self, x, target_embedding, encoder_out):
residual = x
# attention
x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
x = self.bmm(x, encoder_out[0])
# softmax over last dim
sz = x.size()
x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
x = x.view(sz)
attn_scores = x
x = self.bmm(x, encoder_out[1])
# scale attention output
s = encoder_out[1].size(1)
x = x * (s * math.sqrt(1.0 / s))
# project back
x = (self.out_projection(x) + residual) * math.sqrt(0.5)
return x, attn_scores
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
return alpha
def softmax(x, dim=-1):
"""
TODO: change to use the default pyTorch implementation when available
Source: https://discuss.pytorch.org/t/why-softmax-function-cant-specify-the-dimension-to-operate/2637
:param x: tensor
:param dim: Dimension to apply the softmax function to. The elements of the tensor in this
dimension must sum to 1.
:return: tensor having the same dimension as `x` rescaled along dim
"""
input_size = x.size()
trans_input = x.transpose(dim, len(input_size) - 1)
trans_size = trans_input.size()
input_2d = trans_input.contiguous().view(-1, trans_size[-1])
try:
soft_max_2d = F.softmax(input_2d, 1)
except TypeError:
# Support older pytorch 0.2 release.
soft_max_2d = F.softmax(input_2d)
soft_max_nd = soft_max_2d.view(*trans_size)
return soft_max_nd.transpose(dim, len(input_size) - 1)
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
return alpha
def getAttnOutput(input, attnScorer, winSize=0): # get attention output following [Liu and Lane, Interspeech 2016]. the input is seqlen X batchsize X dim. if winSize is 0, all the time steps are used for the weigted averaging
attnSeq = []
for i in range(input.size(0)):
curSeq = []
if i > 0:
leftBegin = 0
if winSize > 0:
leftBegin = max(0, i-winSize)
curSeq.append(input[leftBegin:i])
if i < input.size(0):
leftEnd = input.size(0)
if winSize > 0:
leftEnd = min(i+winSize+1, input.size(0))
curSeq.append(input[i:leftEnd])
curSeq = torch.cat(curSeq, 0)
cur = input[i:i+1].expand_as(curSeq)
attnScores = attnScorer( torch.cat([cur, curSeq], 2).view(-1, 2*input.size(2)) ) # get attention scores
transAttnScores = attnScores.view(curSeq.size(0), input.size(1)).transpose(0, 1) # batchSize X curSeqLen
smOut = F.softmax(transAttnScores).transpose(0, 1)
smOutSeq = smOut.unsqueeze(2).expand_as(curSeq)
weightedAvgSeq = (curSeq * smOutSeq).sum(0)
attnSeq.append(weightedAvgSeq)
attnSeq = torch.cat(attnSeq, 0)
return torch.cat([input, attnSeq], 2)
def sample(self, inputs, max_length):
targets, init_states = self.initialize(inputs, eval=False)
emb, output, hidden, context = init_states
outputs = []
samples = []
batch_size = targets.size(1)
num_eos = targets[0].data.byte().new(batch_size).zero_()
for i in range(max_length):
output, hidden = self.decoder.step(emb, output, hidden, context)
outputs.append(output)
dist = F.softmax(self.generator(output))
sample = dist.multinomial(1, replacement=False).view(-1).data
samples.append(sample)
# Stop if all sentences reach EOS.
num_eos |= (sample == lib.Constants.EOS)
if num_eos.sum() == batch_size: break
emb = self.decoder.word_lut(Variable(sample))
outputs = torch.stack(outputs)
samples = torch.stack(samples)
return samples, outputs
def test_forward_backward(self):
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from reid.loss import OIMLoss
criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
criterion.lut = torch.eye(3)
x = Variable(torch.randn(3, 3), requires_grad=True)
y = Variable(torch.range(0, 2).long())
loss = criterion(x, y)
loss.backward()
probs = F.softmax(x)
grads = probs.data - torch.eye(3)
abs_diff = torch.abs(grads - x.grad.data)
self.assertEquals(torch.log(probs).diag().sum(), -loss)
self.assertTrue(torch.max(abs_diff) < 1e-6)
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
pooled_features = self.roi_pool(features, rois)
x = pooled_features.view(pooled_features.size()[0], -1)
x = self.fc6(x)
x = F.dropout(x, training=self.training)
x = self.fc7(x)
x = F.dropout(x, training=self.training)
cls_score = self.score_fc(x)
cls_prob = F.softmax(cls_score)
bbox_pred = self.bbox_fc(x)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def forward(self, input, source_hids):
# input: bsz x input_embed_dim
# source_hids: srclen x bsz x output_embed_dim
# x: bsz x output_embed_dim
x = self.input_proj(input)
# compute attention
attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz
# sum weighted sources
x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
x = F.tanh(self.output_proj(torch.cat((x, input), dim=1)))
return x, attn_scores
def forward(self, x, target_embedding, encoder_out):
residual = x
# attention
x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
x = self.bmm(x, encoder_out[0])
# softmax over last dim
sz = x.size()
x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
x = x.view(sz)
attn_scores = x
x = self.bmm(x, encoder_out[1])
# scale attention output
s = encoder_out[1].size(1)
x = x * (s * math.sqrt(1.0 / s))
# project back
x = (self.out_projection(x) + residual) * math.sqrt(0.5)
return x, attn_scores
def _decode(self, tokens, encoder_outs):
# wrap in Variable
tokens = Variable(tokens, volatile=True)
avg_probs = None
avg_attn = None
for model, encoder_out in zip(self.models, encoder_outs):
decoder_out, attn = model.decoder(tokens, encoder_out)
probs = F.softmax(decoder_out[:, -1, :]).data
attn = attn[:, -1, :].data
if avg_probs is None or avg_attn is None:
avg_probs = probs
avg_attn = attn
else:
avg_probs.add_(probs)
avg_attn.add_(attn)
avg_probs.div_(len(self.models))
avg_probs.log_()
avg_attn.div_(len(self.models))
return avg_probs, avg_attn
def forward(self, x, y, x_mask):
"""
Args:
x: batch * len * hdim1
y: batch * hdim2
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.normalize:
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
else:
alpha = xWy.exp()
return alpha
def forward(self, x, x_mask):
"""
Args:
x: batch * len * hdim
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha: batch * len
"""
x_flat = x.view(-1, x.size(-1))
scores = self.linear(x_flat).view(x.size(0), x.size(1))
scores.data.masked_fill_(x_mask.data, -float('inf'))
alpha = F.softmax(scores)
return alpha
# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------
def object_detection_gt_boxes(self, image_path, gt_boxes):
min_score = 1/150.
image = cv2.imread(image_path)
# print 'image.shape', image.shape
im_data, im_scales = self.get_image_blob_noscale(image)
gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0]
# print 'im_data.shape', im_data.shape
# print 'im_scales', im_scales
im_info = np.array(
[[im_data.shape[1], im_data.shape[2], im_scales[0]]],
dtype=np.float32)
object_result = self(im_data, im_info, gt_boxes)[0]
cls_prob_object, bbox_object, object_rois = object_result[:]
prob_object = F.softmax(cls_prob_object)
prob = prob_object.cpu().data
top_5_cls = torch.topk(prob[:, 1:], 5, dim=1)
# print 'im_scales[0]', im_scales[0]
return top_5_cls[1].numpy()
def forward(self, v, u):
"""
Input:
- v: N x D x H x W
- u: N x D
Returns:
- next_u: N x D
"""
N, K = v.size(0), self.hidden_dim
D, H, W = v.size(1), v.size(2), v.size(3)
v_proj = self.Wv(v) # N x K x H x W
u_proj = self.Wu(u) # N x K
u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W)
h = F.tanh(v_proj + u_proj_expand)
p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W)
self.attention_maps = p.data.clone()
v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D)
next_u = u + v_tilde
return next_u
def new_att_module(self):
class NewAttModule(nn.Module):
def __init__(self):
super(NewAttModule, self).__init__()
def forward(self, linput, rinput):
self.lPad = linput.view(-1, linput.size(0), linput.size(1))
self.lPad = linput # self.lPad = Padding(0, 0)(linput) TODO: figureout why padding?
self.M_r = torch.mm(self.lPad, rinput.t())
self.alpha = F.softmax(self.M_r.transpose(0, 1))
self.Yl = torch.mm(self.alpha, self.lPad)
return self.Yl
att_module = NewAttModule()
if getattr(self, "att_module_master", None):
for (tar_param, src_param) in zip(att_module.parameters(), self.att_module_master.parameters()):
tar_param.grad.data = src_param.grad.data.clone()
return att_module
def forward(self, x):
x = F.relu(self.conv11(x))
x = self.bn11(x)
x = F.relu(self.conv12(x))
x = self.bn12(x)
x = F.relu(self.conv21(x))
x = self.bn21(x)
x = F.relu(self.conv22(x))
x = self.bn22(x)
x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
x = self.fc(x.view(x.size()[:2]))#
x = F.softmax(x)
return x
def forward(self, x):
x = F.relu(self.conv11(x))
x = self.bn11(x)
x = self.offset12(x)
x = F.relu(self.conv12(x))
x = self.bn12(x)
x = self.offset21(x)
x = F.relu(self.conv21(x))
x = self.bn21(x)
x = self.offset22(x)
x = F.relu(self.conv22(x))
x = self.bn22(x)
x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
x = self.fc(x.view(x.size()[:2]))
x = F.softmax(x)
return x
def forward(self, x, trainable, fast=True):
q, img, ans, prog = x #Need ans for reinforce
if not trainable: ans = None #Safety
p = self.ProgramGenerator(q)
#Finicky handling of PG-EE transition
batch, sLen, v = p.size()
p = p.view(-1, v)
p = F.softmax(p)
p = p.view(batch, sLen, v)
p, pInds = t.max(p, 2)
pInds = pInds[:, :, 0]
p= p[:, :, 0]
a = self.ExecutionEngine((pInds, p, img), fast=fast)
return a
def forward(self, logits, labels):
softmaxes = F.softmax(logits)
confidences, predictions = torch.max(softmaxes, 1)
accuracies = predictions.eq(labels)
ece = Variable(torch.zeros(1)).type_as(logits)
for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
# Calculated |confidence - accuracy| in each bin
in_bin = confidences.gt(bin_lower) * confidences.le(bin_upper)
prop_in_bin = in_bin.float().mean()
if prop_in_bin.data[0] > 0:
accuracy_in_bin = accuracies[in_bin].float().mean()
avg_confidence_in_bin = confidences[in_bin].mean()
ece += torch.abs(avg_confidence_in_bin- accuracy_in_bin) * prop_in_bin
return ece
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy, dim=1)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy, dim=1)
return alpha
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
A = F.softmax(scaledL2()(X, self.codewords, self.scale))
# aggregate
E = aggregate()(A, X, self.codewords)
return E
def assign(R, S):
r"""
Calculate assignment weights for given residuals (:math:`R`) and scale (:math:`S`)
.. math::
a_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ik}\|^2)}
Shape:
- Input: :math:`R\in\mathcal{R}^{B\times N\times K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output :math:`A\in\mathcal{R}^{B\times N\times K}`
"""
L = square_squeeze()(R)
K = S.size(0)
SL = L * S.view(1,1,K)
return F.softmax(SL)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, X)
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D)
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights NxKxD
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=1)
# aggregate
E = aggregate(A, X, self.codewords)
return E
def forward(self, input, compute_loss=False, avg_loss=True):
# compute posterior
en1 = F.softplus(self.en1_fc(input)) # en1_fc output
en2 = F.softplus(self.en2_fc(en1)) # encoder2 output
en2 = self.en2_drop(en2)
posterior_mean = self.mean_bn (self.mean_fc (en2)) # posterior mean
posterior_logvar = self.logvar_bn(self.logvar_fc(en2)) # posterior log variance
posterior_var = posterior_logvar.exp()
# take sample
eps = Variable(input.data.new().resize_as_(posterior_mean.data).normal_()) # noise
z = posterior_mean + posterior_var.sqrt() * eps # reparameterization
p = F.softmax(z) # mixture probability
p = self.p_drop(p)
# do reconstruction
recon = F.softmax(self.decoder_bn(self.decoder(p))) # reconstructed distribution over vocabulary
if compute_loss:
return recon, self.loss(input, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss)
else:
return recon
def forward(self, x, y, x_mask):
"""
Args:
x: batch * len * hdim1
y: batch * hdim2
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.normalize:
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
else:
alpha = xWy.exp()
return alpha
def forward(self, x, x_mask):
"""
Args:
x: batch * len * hdim
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha: batch * len
"""
x_flat = x.view(-1, x.size(-1))
scores = self.linear(x_flat).view(x.size(0), x.size(1))
scores.data.masked_fill_(x_mask.data, -float('inf'))
alpha = F.softmax(scores)
return alpha
# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------