def forward(self, input, target):
if input.dim()>2:
input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W
input = input.transpose(1,2) # N,C,H*W => N,H*W,C
input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C
target = target.view(-1,1)
logpt = F.log_softmax(input)
logpt = logpt.gather(1,target)
logpt = logpt.view(-1)
pt = Variable(logpt.data.exp())
if self.alpha is not None:
if self.alpha.type()!=input.data.type():
self.alpha = self.alpha.type_as(input.data)
at = self.alpha.gather(0,target.data.view(-1))
logpt = logpt * Variable(at)
loss = -1 * (1-pt)**self.gamma * logpt
if self.size_average: return loss.mean()
else: return loss.sum()
python类log_softmax()的实例源码
def forward(self, x):
n_idx = 0
c_idx = 1
h_idx = 2
w_idx = 3
x = self.lookup_table(x)
x = x.unsqueeze(c_idx)
enc_outs = []
for encoder in self.encoders:
enc_ = F.relu(encoder(x))
k_h = enc_.size()[h_idx]
enc_ = F.max_pool2d(enc_, kernel_size=(k_h, 1))
enc_ = enc_.squeeze(w_idx)
enc_ = enc_.squeeze(h_idx)
enc_outs.append(enc_)
encoding = self.dropout(torch.cat(enc_outs, 1))
return F.log_softmax(self.logistic(encoding))
def forward(self, model, sample):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss, as a Variable
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
input = F.log_softmax(net_output.view(-1, net_output.size(-1)))
target = sample['target'].view(-1)
loss = LabelSmoothedNLLLoss.apply(input, target, self.eps, self.padding_idx, self.weights)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': loss.data[0],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
return alpha
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
return alpha
label_smoothed_cross_entropy.py 文件源码
项目:fairseq-py
作者: facebookresearch
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def forward(self, model, sample):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss, as a Variable
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
input = F.log_softmax(net_output.view(-1, net_output.size(-1)))
target = sample['target'].view(-1)
loss = LabelSmoothedNLLLoss.apply(input, target, self.eps, self.padding_idx, self.weights)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': loss.data[0],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
def forward(self, x, y, x_mask):
"""
Args:
x: batch * len * hdim1
y: batch * hdim2
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.normalize:
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
else:
alpha = xWy.exp()
return alpha
def baseline_search(self, input, beam_size=None):
# This is the simple greedy search
batch_size = input.size(0)
hidden_feat = self.lstm_im(input.view(1, input.size()[0], input.size()[1]))[1]
x = Variable(torch.ones(1, batch_size,).type(torch.LongTensor) * self.start, requires_grad=False).cuda() # <start>
output = []
flag = torch.ones(batch_size)
for i in range(self.nseq):
input_x = self.encoder(x.view(1, -1))
output_feature, hidden_feat = self.lstm_word(input_x, hidden_feat)
output_t = self.decoder(output_feature.view(-1, output_feature.size(2)))
output_t = F.log_softmax(output_t)
logprob, x = output_t.max(1)
output.append(x)
flag[x.cpu().eq(self.end).data] = 0
if flag.sum() == 0:
break
output = torch.stack(output, 0).squeeze().transpose(0, 1).cpu().data
return output
def forward(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
# global average pooling layer
x = F.avg_pool2d(x, x.data.size()[-2:])
# flatten for input to fully-connected layer
x = x.view(x.size(0), -1)
x = self.fc(x)
return F.log_softmax(x, dim=1)
def forward(self, prev_samples, upper_tier_conditioning):
(batch_size, _, _) = upper_tier_conditioning.size()
prev_samples = self.embedding(
prev_samples.contiguous().view(-1)
).view(
batch_size, -1, self.q_levels
)
prev_samples = prev_samples.permute(0, 2, 1)
upper_tier_conditioning = upper_tier_conditioning.permute(0, 2, 1)
x = F.relu(self.input(prev_samples) + upper_tier_conditioning)
x = F.relu(self.hidden(x))
x = self.output(x).permute(0, 2, 1).contiguous()
return F.log_softmax(x.view(-1, self.q_levels)) \
.view(batch_size, -1, self.q_levels)
def forward(self, x):
nBatch = x.size(0)
x = F.max_pool2d(self.conv1(x), 2)
x = F.max_pool2d(self.conv2(x), 2)
x = x.view(nBatch, -1)
L = self.M*self.L
Q = L.mm(L.t()) + self.eps*Variable(torch.eye(self.nHidden)).cuda()
Q = Q.unsqueeze(0).expand(nBatch, self.nHidden, self.nHidden)
G = self.G.unsqueeze(0).expand(nBatch, self.nineq, self.nHidden)
z0 = self.qp_z0(x)
s0 = self.qp_s0(x)
h = z0.mm(self.G.t())+s0
e = Variable(torch.Tensor())
inputs = self.qp_o(x)
x = QPFunction()(Q, inputs, G, h, e, e)
x = x[:,:10]
return F.log_softmax(x)
def forward(self, x):
nBatch = x.size(0)
# FC-ReLU-QP-FC-Softmax
x = x.view(nBatch, -1)
x = F.relu(self.fc1(x))
Q = self.Q.unsqueeze(0).expand(nBatch, self.Q.size(0), self.Q.size(1))
p = -x.view(nBatch,-1)
G = self.G.unsqueeze(0).expand(nBatch, self.G.size(0), self.G.size(1))
h = self.h.unsqueeze(0).expand(nBatch, self.h.size(0))
A = self.A.unsqueeze(0).expand(nBatch, self.A.size(0), self.A.size(1))
b = self.b.unsqueeze(0).expand(nBatch, self.b.size(0))
x = QPFunction(verbose=False)(Q, p.double(), G, h, A, b).float()
x = self.fc2(x)
return F.log_softmax(x)
def feedforward_test():
import torch.nn as nn
import torch.nn.functional as F
fc1 = nn.Linear(10,20)
fc1.weight.data.normal_(0.0,1.0)
fc1.bias.data.normal_(0.0,1.0)
fc2 = nn.Linear(20,2)
fc2.weight.data.normal_(0.0,1.0)
fc2.bias.data.normal_(0.0,1.0)
model = lambda x: F.log_softmax(fc2(F.relu(fc1(x))))
data = Variable(torch.rand(10,10))
out_path = 'out'
if not os.path.isdir(out_path):
os.mkdir(out_path)
uid = str(uuid.uuid4())
torch2c.compile(model(data),'feedforward',os.path.join(out_path,uid),compile_test=True)
def forward(self, x):
if self.deep:
x = x.view(-1, 28*28)
for fc in self.fcs[:-1]:
x = F.relu(fc(x))
x = self.fcs[-1](x)
return F.log_softmax(x)
else:
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = self.fc5(x)
return F.log_softmax(x)
def forward(self, x, y, x_mask):
"""
x = batch * len * h1
y = batch * h2
x_mask = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy, dim=1)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy, dim=1)
return alpha
def forward(self, x, y, x_mask):
"""
Args:
x: batch * len * hdim1
y: batch * hdim2
x_mask: batch * len (1 for padding, 0 for true)
Output:
alpha = batch * len
"""
Wy = self.linear(y) if self.linear is not None else y
xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2)
xWy.data.masked_fill_(x_mask.data, -float('inf'))
if self.normalize:
if self.training:
# In training we output log-softmax for NLL
alpha = F.log_softmax(xWy)
else:
# ...Otherwise 0-1 probabilities
alpha = F.softmax(xWy)
else:
alpha = xWy.exp()
return alpha
def forward(self, qu, w, cand):
qu = Variable(qu)
w = Variable(w)
cand = Variable(cand)
embed_q = self.embed_B(qu)
embed_w1 = self.embed_A(w)
embed_w2 = self.embed_C(w)
embed_c = self.embed_C(cand)
#pdb.set_trace()
q_state = torch.sum(embed_q, 1).squeeze(1)
w1_state = torch.sum(embed_w1, 1).squeeze(1)
w2_state = torch.sum(embed_w2, 1).squeeze(1)
for _ in range(self.config.hop):
sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1))
sent_att = F.softmax(sent_dot)
a_dot = torch.mm(sent_att, w2_state)
a_dot = self.H(a_dot)
q_state = torch.add(a_dot, q_state)
f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1))
score = F.log_softmax(f_feat)
return score
seq2seq_batched_10.py 文件源码
项目:Seq2Seq-on-Word-Sense-Disambiguition
作者: lbwbowenLi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def forward(self, word_input, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# TODO: FIX BATCHING
# Get the embedding of the current input word (last output word)
word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N
word_embedded = self.dropout(word_embedded)
# Calculate attention weights and apply to encoder outputs
attn_weights = self.attn(last_hidden[-1], encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N
context = context.transpose(0, 1) # 1 x B x N
# Combine embedded input word and attended context, run through RNN
rnn_input = torch.cat((word_embedded, context), 2)
output, hidden = self.gru(rnn_input, last_hidden)
# Final output layer
output = output.squeeze(0) # B x N
output = F.log_softmax(self.out(torch.cat((output, context), 1)))
# Return final output, hidden state, and attention weights (for visualization)
return output, hidden, attn_weights
seq2seq_batched_10_autotest.py 文件源码
项目:Seq2Seq-on-Word-Sense-Disambiguition
作者: lbwbowenLi
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def forward(self, word_input, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# TODO: FIX BATCHING
# Get the embedding of the current input word (last output word)
word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N
word_embedded = self.dropout(word_embedded)
# Calculate attention weights and apply to encoder outputs
attn_weights = self.attn(last_hidden[-1], encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N
context = context.transpose(0, 1) # 1 x B x N
# Combine embedded input word and attended context, run through RNN
rnn_input = torch.cat((word_embedded, context), 2)
output, hidden = self.gru(rnn_input, last_hidden)
# Final output layer
output = output.squeeze(0) # B x N
output = F.log_softmax(self.out(torch.cat((output, context), 1)))
# Return final output, hidden state, and attention weights (for visualization)
return output, hidden, attn_weights
def forward(self, x, lengths):
batch_size, seq_length = x.size()[:2]
emb = Variable(torch.from_numpy(
self.initial_embeddings.take(x.numpy(), 0)),
volatile=not self.training)
h = Variable(torch.zeros(batch_size, self.model_dim), volatile=not self.training)
for t in range(seq_length):
inp = emb[:,t,:]
h = self.rnn(inp, h)
h = F.relu(self.l0(F.dropout(h.squeeze(), 0.5, self.training)))
h = F.relu(self.l1(F.dropout(h, 0.5, self.training)))
y = F.log_softmax(h)
return y
def forward(self, x, lengths):
batch_size = x.size(0)
max_len = max(lengths)
emb = Variable(torch.from_numpy(
self.initial_embeddings.take(x.numpy(), 0)),
volatile=not self.training)
for t in range(max_len):
indices = []
for i, l in enumerate(lengths):
if l >= max(lengths) - t:
indices.append(i)
# Build batch.
dynamic_batch_size = len(indices)
inp = Variable(torch.FloatTensor(dynamic_batch_size, self.word_embedding_dim), volatile=not self.training)
h = Variable(torch.FloatTensor(dynamic_batch_size, self.model_dim), volatile=not self.training)
output = self.rnn(inp, h)
hn = output
h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training)))
h = F.relu(self.l1(F.dropout(h, 0.5, self.training)))
y = F.log_softmax(h)
return y
def forward(self, x, lengths):
batch_size = x.size(0)
max_len = max(lengths)
emb = Variable(torch.from_numpy(
self.initial_embeddings.take(x.numpy(), 0)),
volatile=not self.training)
inp = Variable(torch.FloatTensor(emb.size()), volatile=not self.training)
h0 = Variable(torch.FloatTensor(1, batch_size, self.model_dim), volatile=not self.training)
_, hn = self.rnn(emb, h0)
h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training)))
h = F.relu(self.l1(F.dropout(h, 0.5, self.training)))
y = F.log_softmax(h)
return y
def forward(self, input, hidden, encoder_output, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)))
attn_weights = attn_weights.cuda() if use_cuda else attn_weights
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))
attn_applied = attn_applied.cuda() if use_cuda else attn_applied
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = output.cuda() if use_cuda else output
output = self.attn_combine(output).unsqueeze(0)
for i in range(self.n_layers):
output = F.relu(output)
output = output.cuda() if use_cuda else output
output, hidden = self.gru(output, hidden)
output = F.log_softmax(self.out(output[0]))
output = output.cuda() if use_cuda else output
return output, hidden, attn_weights
def sample(self, seed, maximumLength, T = 1):
h = self.h0(seed).view(self.layers, 1, self.H)
accumulator = ["START"]
for _ in range(maximumLength):
i = self.targetsOfSymbols([accumulator[-1]])[:,0]
output, h = self(i,h)
distribution = output.data.view(-1)/T
distribution = F.log_softmax(distribution).data
distribution = distribution.exp()
c = torch.multinomial(distribution,1)[0]
if self.lexicon[c] == "END": break
accumulator.append(self.lexicon[c])
return accumulator[1:]
def sample(self, features):
result = ["START"]
# (1,1,F)
features = features.view(-1).unsqueeze(0).unsqueeze(0)
#features: 1x1x2560
states = None
while True:
e = self.embedding(variable([symbolToIndex[result[-1]]]).view((1,-1)))
recurrentInput = torch.cat((features,e),2)
output, states = self.rnn(recurrentInput,states)
distribution = self.tokenPrediction(output).view(-1)
distribution = F.log_softmax(distribution).data.exp()
draw = torch.multinomial(distribution,1)[0]
c = LEXICON[draw]
if len(result) > 20 or c == "END":
return result[1:]
else:
result.append(c)
def cross_entropy2d(input, target, weight=None, size_average=True):
# input: (n, c, h, w), target: (n, h, w)
n, c, h, w = input.size()
# log_p: (n, c, h, w)
log_p = F.log_softmax(input)
# log_p: (n*h*w, c)
log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
log_p = log_p.view(-1, c)
# target: (n*h*w,)
mask = target >= 0
target = target[mask]
loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
if size_average:
loss /= mask.data.sum()
return loss
def forward(self, logits, target):
"""
:param logits: tensor with shape of [batch_size, seq_len, input_size]
:param target: tensor with shape of [batch_size, seq_len] of Long type filled with indexes to gather from logits
:return: tensor with shape of [batch_size] with perplexity evaluation
"""
[batch_size, seq_len, input_size] = logits.size()
logits = logits.view(-1, input_size)
log_probs = F.log_softmax(logits)
del logits
log_probs = log_probs.view(batch_size, seq_len, input_size)
target = target.unsqueeze(2)
out = t.gather(log_probs, dim=2, index=target).squeeze(2).neg()
ppl = out.mean(1).exp()
return ppl
def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)
for i in range(self.n_layers):
output = F.relu(output)
output, hidden = self.gru(output, hidden)
output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights
def forward(self, batch):
# shape of batch (sequence length, batch size)
inputs = self.embed(batch.question) # shape (sequence length, batch_size, dimension of embedding)
batch_size = inputs.size()[1]
state_shape = self.config.n_cells, batch_size, self.config.d_hidden
if self.config.rnn_type.lower() == 'gru':
h0 = autograd.Variable(inputs.data.new(*state_shape).zero_())
outputs, ht = self.rnn(inputs, h0)
else:
h0 = c0 = autograd.Variable(inputs.data.new(*state_shape).zero_())
outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
# shape of `outputs` - (sequence length, batch size, hidden size X num directions)
tags = self.hidden2tag(outputs.view(-1, outputs.size(2)))
# print(tags)
scores = F.log_softmax(tags)
return scores
def forward(self, x_in):
out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_document_length,1)))
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.dropout(out, training=self.training)
out = self.fc2(out)
return F.log_softmax(out)