def forward(self, qu, w, cand):
qu = Variable(qu)
w = Variable(w)
cand = Variable(cand)
embed_q = self.embed_B(qu)
embed_w1 = self.embed_A(w)
embed_w2 = self.embed_C(w)
embed_c = self.embed_C(cand)
#pdb.set_trace()
q_state = torch.sum(embed_q, 1).squeeze(1)
w1_state = torch.sum(embed_w1, 1).squeeze(1)
w2_state = torch.sum(embed_w2, 1).squeeze(1)
for _ in range(self.config.hop):
sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1))
sent_att = F.softmax(sent_dot)
a_dot = torch.mm(sent_att, w2_state)
a_dot = self.H(a_dot)
q_state = torch.add(a_dot, q_state)
f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1))
score = F.log_softmax(f_feat)
return score
python类softmax()的实例源码
seq2seq_batched_10.py 文件源码
项目:Seq2Seq-on-Word-Sense-Disambiguition
作者: lbwbowenLi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def forward(self, hidden, encoder_outputs):
max_len = encoder_outputs.size(0)
this_batch_size = encoder_outputs.size(1)
# Create variable to store attention energies
attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S
if USE_CUDA:
attn_energies = attn_energies.cuda()
# For each batch of encoder outputs
for b in range(this_batch_size):
# Calculate energy for each encoder output
for i in range(max_len):
attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))
# Normalize energies to weights in range 0 to 1, resize to 1 x B x S
return F.softmax(attn_energies).unsqueeze(1)
seq2seq_batched_10_autotest.py 文件源码
项目:Seq2Seq-on-Word-Sense-Disambiguition
作者: lbwbowenLi
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def forward(self, hidden, encoder_outputs):
max_len = encoder_outputs.size(0)
this_batch_size = encoder_outputs.size(1)
# Create variable to store attention energies
attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S
if USE_CUDA:
attn_energies = attn_energies.cuda()
# For each batch of encoder outputs
for b in range(this_batch_size):
# Calculate energy for each encoder output
for i in range(max_len):
attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))
# Normalize energies to weights in range 0 to 1, resize to 1 x B x S
return F.softmax(attn_energies).unsqueeze(1)
def forward(self, output, context):
batch_size = output.size(0)
hidden_size = output.size(2)
input_size = context.size(1)
# (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
attn = torch.bmm(output, context.transpose(1, 2))
if self.mask is not None:
attn.data.masked_fill_(self.mask, -float('inf'))
attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size)
# (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
mix = torch.bmm(attn, context)
# concat -> (batch, out_len, 2*dim)
combined = torch.cat((mix, output), dim=2)
# output -> (batch, out_len, dim)
output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)
return output, attn
def forward(self, h, att_feats, p_att_feats):
# The p_att_feats here is already projected
att_size = att_feats.numel() // att_feats.size(0) // self.rnn_size
att = p_att_feats.view(-1, att_size, self.att_hid_size)
att_h = self.h2att(h) # batch * att_hid_size
att_h = att_h.unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size
dot = att + att_h # batch * att_size * att_hid_size
dot = F.tanh(dot) # batch * att_size * att_hid_size
dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size
dot = self.alpha_net(dot) # (batch * att_size) * 1
dot = dot.view(-1, att_size) # batch * att_size
weight = F.softmax(dot) # batch * att_size
att_feats_ = att_feats.view(-1, att_size, self.rnn_size) # batch * att_size * att_feat_size
att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size
return att_res
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
conv_new1 = self.new_conv(features)
r_score_map = self.rfcn_score(conv_new1)
r_bbox_map = self.rfcn_bbox(conv_new1)
psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois)
psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois)
bbox_pred = self.bbox_pred(psroi_pooled_loc)
bbox_pred = torch.squeeze(bbox_pred)
cls_score = self.cls_score(psroi_pooled_cls)
cls_score = torch.squeeze(cls_score)
cls_prob = F.softmax(cls_score)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def forward(self, input, hidden, encoder_output, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)))
attn_weights = attn_weights.cuda() if use_cuda else attn_weights
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))
attn_applied = attn_applied.cuda() if use_cuda else attn_applied
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = output.cuda() if use_cuda else output
output = self.attn_combine(output).unsqueeze(0)
for i in range(self.n_layers):
output = F.relu(output)
output = output.cuda() if use_cuda else output
output, hidden = self.gru(output, hidden)
output = F.log_softmax(self.out(output[0]))
output = output.cuda() if use_cuda else output
return output, hidden, attn_weights
def forward(self, x):
x = self.conv(x)
sizes = x.size()
x = x.view(sizes[0], sizes[1] * sizes[2], sizes[3]) # Collapse feature dimension
x = x.transpose(1, 2).transpose(0, 1).contiguous() # TxNxH
x = self.rnns(x)
if not self._bidirectional: # no need for lookahead layer in bidirectional
x = self.lookahead(x)
x = self.fc(x)
x = x.transpose(0, 1)
# identity in training mode, softmax in eval mode
x = self.inference_softmax(x)
return x
def forward(self, dec_out, enc_outs, enc_att=None, mask=None):
"""
Parameters:
-----------
- dec_out: torch.Tensor(batch_size x hid_dim)
- enc_outs: torch.Tensor(seq_len x batch_size x hid_dim)
- enc_att: (optional), torch.Tensor(seq_len x batch_size x att_dim)
- mask: (optional), torch.ByteTensor(batch_size x seq_len)
"""
# (batch x seq_len)
weights = self.scorer(dec_out, enc_outs, enc_att=enc_att)
if mask is not None:
# weights = weights * mask.float()
weights.data.masked_fill_(1 - mask.data, -float('inf'))
weights = F.softmax(weights, dim=1)
# (eq 7)
context = weights.unsqueeze(1).bmm(enc_outs.transpose(0, 1)).squeeze(1)
# (eq 5) linear out combining context and hidden
context = F.tanh(self.linear_out(torch.cat([context, dec_out], 1)))
return context, weights
def _content_focus(self, memory_vb):
"""
variables needed:
key_vb: [batch_size x num_heads x mem_wid]
-> similarity key vector, to compare to each row in memory
-> by cosine similarity
beta_vb: [batch_size x num_heads x 1]
-> NOTE: refer here: https://github.com/deepmind/dnc/issues/9
-> \in (1, +inf) after oneplus(); similarity key strength
-> amplify or attenuate the pecision of the focus
memory_vb: [batch_size x mem_hei x mem_wid]
returns:
wc_vb: [batch_size x num_heads x mem_hei]
-> the attention weight by content focus
"""
K_vb = batch_cosine_sim(self.key_vb, memory_vb) # [batch_size x num_heads x mem_hei]
self.wc_vb = K_vb * self.beta_vb.expand_as(K_vb) # [batch_size x num_heads x mem_hei]
self.wc_vb = F.softmax(self.wc_vb.transpose(0, 2)).transpose(0, 2)
def _content_focus(self, memory_vb):
"""
variables needed:
key_vb: [batch_size x num_heads x mem_wid]
-> similarity key vector, to compare to each row in memory
-> by cosine similarity
beta_vb: [batch_size x num_heads x 1]
-> NOTE: refer here: https://github.com/deepmind/dnc/issues/9
-> \in (1, +inf) after oneplus(); similarity key strength
-> amplify or attenuate the pecision of the focus
memory_vb: [batch_size x mem_hei x mem_wid]
returns:
wc_vb: [batch_size x num_heads x mem_hei]
-> the attention weight by content focus
"""
K_vb = batch_cosine_sim(self.key_vb, memory_vb) # [batch_size x num_heads x mem_hei]
self.wc_vb = K_vb * self.beta_vb.expand_as(K_vb) # [batch_size x num_heads x mem_hei]
self.wc_vb = F.softmax(self.wc_vb.transpose(0, 2)).transpose(0, 2)
def forward(self, x):
"""
A model for non-linear data that works off of mixing multiple Gaussian
distributions together. Uses linear projections of a given input to generate
a set of N Gaussian models' mixture components, means and standard deviations.
:param x: (num. samples, input dim.)
:return: Mixture components, means, and standard deviations
in the form (num. samples, num. mixtures)
"""
x = F.tanh(self.projection(x))
weights = F.softmax(self.weights_projection(x))
means = self.mean_projection(x)
stds = torch.exp(self.std_projection(x))
return weights, means, stds
def forward(self, *hidden_states):
if len(hidden_states) == 1:
hidden_state = hidden_states[0]
return F.softmax(F.tanh(self.projection(hidden_state))) * hidden_state
elif len(hidden_states) == 2:
left_hidden_state, right_hidden_state = hidden_states
if self.mode == 0 or self.mode == 1:
if self.mode == 0:
left_attention_weights = F.softmax(F.tanh(self.projection(left_hidden_state)))
right_attention_weights = F.softmax(F.tanh(self.projection(right_hidden_state)))
elif self.mode == 1:
left_attention_weights = F.softmax(F.tanh(self.left_projection(left_hidden_state)))
right_attention_weights = F.softmax(F.tanh(self.right_projection(right_hidden_state)))
return left_attention_weights * left_hidden_state, right_attention_weights * right_hidden_state
elif self.mode == 2:
hidden_state = torch.cat([left_hidden_state, right_hidden_state], dim=1)
attention_weights = F.softmax(F.tanh(self.projection(hidden_state)))
return attention_weights * left_hidden_state, attention_weights * right_hidden_state
def forward(self, last_state, states, mask=None):
sequence_length, batch_size, hidden_dim = states.size()
last_state = last_state.unsqueeze(0).expand(sequence_length, batch_size, last_state.size(1))
if self.mode == "dot":
energies = last_state * states
energies = energies.sum(dim=2).squeeze()
elif self.mode == "general":
expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
energies = last_state * states.bmm(expanded_projection)
energies = energies.sum(dim=2).squeeze()
elif self.mode == "concat":
expanded_reduction = self.reduction.expand(sequence_length, *self.reduction.size())
expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
energies = F.tanh(torch.cat([last_state, states], dim=2).bmm(expanded_reduction))
energies = energies.bmm(expanded_projection).squeeze()
if type(mask) == torch.autograd.Variable:
energies = energies + ((mask == 0).float() * -10000)
attention_weights = F.softmax(energies)
return attention_weights
def forward(self, last_state, states):
if len(states.size()) == 2: states = states.unsqueeze(0)
sequence_length, batch_size, state_dim = states.size()
transformed_last_state = last_state @ self.projection
transformed_last_state = transformed_last_state.expand(sequence_length, batch_size, self.encoder_dim)
transformed_last_state = transformed_last_state.transpose(0, 1).contiguous()
transformed_last_state = transformed_last_state.view(batch_size, -1)
states = states.transpose(0, 1).contiguous()
states = states.view(batch_size, -1)
energies = transformed_last_state * states
energies = energies.sum(dim=1)
if self.encoder_dim is not None:
attention_weights = torch.cat([torch.exp(energies[0]), F.softmax(energies[1:])], dim=0)
else:
attention_weights = F.softmax(energies)
return attention_weights
def forward(self, x):
x = F.relu(self.conv11(x))
x = self.bn11(x)
x = F.relu(self.conv12(x))
x = self.bn12(x)
x = F.relu(self.conv21(x))
x = self.bn21(x)
x = F.relu(self.conv22(x))
x = self.bn22(x)
x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
x = self.fc(x.view(x.size()[:2]))#
x = F.softmax(x)
return x
def forward(self, x):
x = F.relu(self.conv11(x))
x = self.bn11(x)
x = self.offset12(x)
x = F.relu(self.conv12(x))
x = self.bn12(x)
x = self.offset21(x)
x = F.relu(self.conv21(x))
x = self.bn21(x)
x = self.offset22(x)
x = F.relu(self.conv22(x))
x = self.bn22(x)
x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
x = self.fc(x.view(x.size()[:2]))
x = F.softmax(x)
return x
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
pooled_features = self.roi_pool(features, rois)
x = pooled_features.view(pooled_features.size()[0], -1)
# x = self.fc6(x)
# x = F.dropout(x, training=self.training)
# x = self.fc7(x)
# x = F.dropout(x, training=self.training)
x = self.fcs(x)
cls_score = self.score_fc(x)
cls_prob = F.softmax(cls_score)
bbox_pred = self.bbox_fc(x)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)
for i in range(self.n_layers):
output = F.relu(output)
output, hidden = self.gru(output, hidden)
output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights
def module_cls(self):
"""Return a simple module that concatenates its 2 inputs in
forward step.
"""
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.dense = nn.Linear(20, 2)
# pylint: disable=arguments-differ
def forward(self, X0, X1):
X = torch.cat((X0, X1), 1)
X = F.softmax(self.dense(X), dim=-1)
return X
return MyModule