def _forward_age_cls(self, feat):
'''
Input:
feat: CNN feature (ReLUed)
Output:
age_out: output age prediction (for evaluation)
age_fc: final fc layer output (for compute loss)
'''
#fc_out = self.age_cls(feat)
fc_out = self.age_cls(F.relu(feat))
if self.opts.cls_type == 'dex':
# Deep EXpectation
age_scale = np.arange(self.opts.min_age, self.opts.max_age + 1, 1.0)
age_scale = Variable(fc_out.data.new(age_scale)).unsqueeze(1)
age_out = torch.matmul(F.softmax(fc_out), age_scalei).view(-1)
elif self.opts.cls_type == 'oh':
# Ordinal Hyperplane
fc_out = F.sigmoid(fc_out)
age_out = fc_out.sum(dim = 1) + self.opts.min_age
elif self.opts.cls_type == 'reg':
# Regression
age_out = fc_out.view(-1)
age_out = age_out + self.opts.min_age
return age_out, fc_out
python类matmul()的实例源码
def _compute_age(self, feat_relu):
'''
input:
feat: output of feat_embed layer (after relu)
output:
age_out
age_fc_out
'''
age_fc_out = self.age_cls(feat_relu)
if self.opts.cls_type == 'dex':
# deep expectation
age_scale = np.arange(self.opts.min_age, self.opts.max_age + 1, 1.0)
age_scale = Variable(age_fc_out.data.new(age_scale)).unsqueeze(1)
age_out = torch.matmul(F.softmax(age_fc_out), age_scale).view(-1)
elif self.opts.cls_type == 'oh':
# ordinal hyperplane
age_fc_out = F.sigmoid(age_fc_out)
age_out = age_fc_out.sum(dim = 1) + self.opts.min_age
elif self.opts.cls_type == 'reg':
# regression
age_out = self.age_fc_out.view(-1) + self.opts.min_age
return age_out, age_fc_out
def _forward_age_cls(self, feat):
'''
Input:
feat: CNN feature (ReLUed)
Output:
age_out: output age prediction (for evaluation)
age_fc: final fc layer output (for compute loss)
'''
#fc_out = self.age_cls(feat)
fc_out = self.age_cls(F.relu(feat))
if self.opts.cls_type == 'dex':
# Deep EXpectation
age_scale = np.arange(self.opts.min_age, self.opts.max_age + 1, 1.0)
age_scale = Variable(fc_out.data.new(age_scale)).unsqueeze(1)
age_out = torch.matmul(F.softmax(fc_out), age_scalei).view(-1)
elif self.opts.cls_type == 'oh':
# Ordinal Hyperplane
fc_out = F.sigmoid(fc_out)
age_out = fc_out.sum(dim = 1) + self.opts.min_age
elif self.opts.cls_type == 'reg':
# Regression
age_out = fc_out.view(-1)
age_out = age_out + self.opts.min_age
return age_out, fc_out
def _forward_age_cls(self, feat):
'''
Input:
feat: CNN feature (ReLUed)
Output:
age_out: output age prediction (for evaluation)
age_fc: final fc layer output (for compute loss)
'''
#fc_out = self.age_cls(feat)
fc_out = self.age_cls(F.relu(feat))
if self.opts.cls_type == 'dex':
# Deep EXpectation
age_scale = np.arange(self.opts.min_age, self.opts.max_age + 1, 1.0)
age_scale = Variable(fc_out.data.new(age_scale)).unsqueeze(1)
age_out = torch.matmul(F.softmax(fc_out), age_scalei).view(-1)
elif self.opts.cls_type == 'oh':
# Ordinal Hyperplane
fc_out = F.sigmoid(fc_out)
age_out = fc_out.sum(dim = 1) + self.opts.min_age
elif self.opts.cls_type == 'reg':
# Regression
age_out = fc_out.view(-1)
age_out = age_out + self.opts.min_age
return age_out, fc_out
def forward(self, encoded_question, question_length, encoded_support, support_length,
correct_start, answer2question, is_eval):
# casting
long_tensor = torch.cuda.LongTensor if encoded_question.is_cuda else torch.LongTensor
answer2question = answer2question.type(long_tensor)
# computing single time attention over question
attention_scores = self._linear_question_attention(encoded_question)
q_mask = misc.mask_for_lengths(question_length)
attention_scores = attention_scores.squeeze(2) + q_mask
question_attention_weights = F.softmax(attention_scores)
question_state = torch.matmul(question_attention_weights.unsqueeze(1),
encoded_question).squeeze(1)
# Prediction
# start
start_input = torch.cat([question_state.unsqueeze(1) * encoded_support, encoded_support], 2)
q_start_state = self._linear_q_start(start_input) + self._linear_q_start_q(question_state).unsqueeze(1)
start_scores = self._linear_start_scores(F.relu(q_start_state)).squeeze(2)
support_mask = misc.mask_for_lengths(support_length)
start_scores = start_scores + support_mask
_, predicted_start_pointer = start_scores.max(1)
def align(t):
return torch.index_select(t, 0, answer2question)
if is_eval:
start_pointer = predicted_start_pointer
else:
# use correct start during training, because p(end|start) should be optimized
start_pointer = correct_start.type(long_tensor)
predicted_start_pointer = align(predicted_start_pointer)
start_scores = align(start_scores)
start_input = align(start_input)
encoded_support = align(encoded_support)
question_state = align(question_state)
support_mask = align(support_mask)
# end
u_s = []
for b, p in enumerate(start_pointer):
u_s.append(encoded_support[b, p.data[0]])
u_s = torch.stack(u_s)
end_input = torch.cat([encoded_support * u_s.unsqueeze(1), start_input], 2)
q_end_state = self._linear_q_end(end_input) + self._linear_q_end_q(question_state).unsqueeze(1)
end_scores = self._linear_end_scores(F.relu(q_end_state)).squeeze(2)
end_scores = end_scores + support_mask
max_support = support_length.max().data[0]
if is_eval:
end_scores += misc.mask_for_lengths(start_pointer, max_support, mask_right=False)
_, predicted_end_pointer = end_scores.max(1)
return start_scores, end_scores, predicted_start_pointer, predicted_end_pointer
def test_functional_blas(self):
def compare(fn, *args):
unpacked_args = tuple(arg.data if isinstance(arg, Variable) else arg
for arg in args)
unpacked_result = fn(*unpacked_args)
packed_result = fn(*args).data
# if non-Variable torch function returns a scalar, compare to scalar
if not torch.is_tensor(unpacked_result):
assert packed_result.dim() == 1
assert packed_result.nelement() == 1
packed_result = packed_result[0]
self.assertEqual(packed_result, unpacked_result)
def test_blas_add(fn, x, y, z):
# Checks all signatures
compare(fn, x, y, z)
compare(fn, 0.5, x, y, z)
compare(fn, 0.5, x, 0.25, y, z)
def test_blas(fn, x, y):
compare(fn, x, y)
test_blas(torch.mm, Variable(torch.randn(2, 10)),
Variable(torch.randn(10, 4)))
test_blas_add(torch.addmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.bmm, Variable(torch.randn(4, 2, 10)),
Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.addbmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.baddbmm, Variable(torch.randn(4, 2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas(torch.mv, Variable(torch.randn(2, 10)),
Variable(torch.randn(10)))
test_blas_add(torch.addmv, Variable(torch.randn(2)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10)))
test_blas(torch.ger, Variable(torch.randn(5)),
Variable(torch.randn(6)))
test_blas_add(torch.addr, Variable(torch.randn(5, 6)),
Variable(torch.randn(5)), Variable(torch.randn(6)))
test_blas(torch.matmul, Variable(torch.randn(6)), Variable(torch.randn(6)))
test_blas(torch.matmul, Variable(torch.randn(10, 4)), Variable(torch.randn(4)))
test_blas(torch.matmul, Variable(torch.randn(5)), Variable(torch.randn(5, 6)))
test_blas(torch.matmul, Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.matmul, Variable(torch.randn(5, 2, 10)), Variable(torch.randn(5, 10, 4)))
test_blas(torch.matmul, Variable(torch.randn(3, 5, 2, 10)), Variable(torch.randn(3, 5, 10, 4)))
test_blas(torch.matmul, Variable(torch.randn(3, 5, 2, 10)), Variable(torch.randn(10)))
test_blas(torch.matmul, Variable(torch.randn(10)), Variable(torch.randn(3, 5, 10, 4)))
def forward(self, input_d, input_e, mask_d=None, mask_e=None):
'''
Args:
input_d: Tensor
the decoder input tensor with shape = [batch, length_decoder, input_size]
input_e: Tensor
the child input tensor with shape = [batch, length_encoder, input_size]
mask_d: Tensor or None
the mask tensor for decoder with shape = [batch, length_decoder]
mask_e: Tensor or None
the mask tensor for encoder with shape = [batch, length_encoder]
Returns: Tensor
the energy tensor with shape = [batch, num_label, length, length]
'''
assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.'
batch, length_decoder, _ = input_d.size()
_, length_encoder, _ = input_e.size()
# compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder]
# the output shape is [batch, num_label, length_decoder]
out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3)
# compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder]
# the output shape is [batch, num_label, length_encoder]
out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2)
# output shape [batch, num_label, length_decoder, length_encoder]
if self.biaffine:
# compute bi-affine part
# [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder]
# output shape [batch, num_label, length_decoder, input_size_encoder]
output = torch.matmul(input_d.unsqueeze(1), self.U)
# [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder]
# output shape [batch, num_label, length_decoder, length_encoder]
output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3))
output = output + out_d + out_e + self.b
else:
output = out_d + out_d + self.b
if mask_d is not None:
output = output * mask_d.unsqueeze(1).unsqueeze(3) * mask_e.unsqueeze(1).unsqueeze(2)
return output
def test_functional_blas(self):
def compare(fn, *args):
unpacked_args = tuple(arg.data if isinstance(arg, Variable) else arg
for arg in args)
unpacked_result = fn(*unpacked_args)
packed_result = fn(*args).data
# if non-Variable torch function returns a scalar, compare to scalar
if not torch.is_tensor(unpacked_result):
assert packed_result.dim() == 1
assert packed_result.nelement() == 1
packed_result = packed_result[0]
self.assertEqual(packed_result, unpacked_result)
def test_blas_add(fn, x, y, z):
# Checks all signatures
compare(fn, x, y, z)
compare(fn, 0.5, x, y, z)
compare(fn, 0.5, x, 0.25, y, z)
def test_blas(fn, x, y):
compare(fn, x, y)
test_blas(torch.mm, Variable(torch.randn(2, 10)),
Variable(torch.randn(10, 4)))
test_blas_add(torch.addmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.bmm, Variable(torch.randn(4, 2, 10)),
Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.addbmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas_add(torch.baddbmm, Variable(torch.randn(4, 2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas(torch.mv, Variable(torch.randn(2, 10)),
Variable(torch.randn(10)))
test_blas_add(torch.addmv, Variable(torch.randn(2)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10)))
test_blas(torch.ger, Variable(torch.randn(5)),
Variable(torch.randn(6)))
test_blas_add(torch.addr, Variable(torch.randn(5, 6)),
Variable(torch.randn(5)), Variable(torch.randn(6)))
test_blas(torch.matmul, Variable(torch.randn(6)), Variable(torch.randn(6)))
test_blas(torch.matmul, Variable(torch.randn(10, 4)), Variable(torch.randn(4)))
test_blas(torch.matmul, Variable(torch.randn(5)), Variable(torch.randn(5, 6)))
test_blas(torch.matmul, Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.matmul, Variable(torch.randn(5, 2, 10)), Variable(torch.randn(5, 10, 4)))
test_blas(torch.matmul, Variable(torch.randn(3, 5, 2, 10)), Variable(torch.randn(3, 5, 10, 4)))
test_blas(torch.matmul, Variable(torch.randn(3, 5, 2, 10)), Variable(torch.randn(10)))
test_blas(torch.matmul, Variable(torch.randn(10)), Variable(torch.randn(3, 5, 10, 4)))
def query(self, x, y, predict=False):
"""
Compute the nearest neighbor of the input queries.
Arguments:
x: A normalized matrix of queries of size (batch_size x key_dim)
y: A matrix of correct labels (batch_size x 1)
Returns:
y_hat, A (batch-size x 1) matrix
- the nearest neighbor to the query in memory_size
softmax_score, A (batch_size x 1) matrix
- A normalized score measuring the similarity between query and nearest neighbor
loss - average loss for memory module
"""
batch_size, dims = x.size()
query = F.normalize(self.query_proj(x), dim=1)
#query = F.normalize(torch.matmul(x, self.query_proj), dim=1)
# Find the k-nearest neighbors of the query
scores = torch.matmul(query, torch.t(self.keys_var))
cosine_similarity, topk_indices_var = torch.topk(scores, self.top_k, dim=1)
# softmax of cosine similarities - embedding
softmax_score = F.softmax(self.softmax_temperature * cosine_similarity)
# retrive memory values - prediction
topk_indices = topk_indices_var.detach().data
y_hat_indices = topk_indices[:, 0]
y_hat = self.values[y_hat_indices]
loss = None
if not predict:
# Loss Function
# topk_indices = (batch_size x topk)
# topk_values = (batch_size x topk x value_size)
# collect the memory values corresponding to the topk scores
batch_size, topk_size = topk_indices.size()
flat_topk = flatten(topk_indices)
flat_topk_values = self.values[topk_indices]
topk_values = flat_topk_values.resize_(batch_size, topk_size)
correct_mask = torch.eq(topk_values, torch.unsqueeze(y.data, dim=1)).float()
correct_mask_var = ag.Variable(correct_mask, requires_grad=False)
pos_score, pos_idx = torch.topk(torch.mul(cosine_similarity, correct_mask_var), 1, dim=1)
neg_score, neg_idx = torch.topk(torch.mul(cosine_similarity, 1-correct_mask_var), 1, dim=1)
# zero-out correct scores if there are no correct values in topk values
mask = 1.0 - torch.eq(torch.sum(correct_mask_var, dim=1), 0.0).float()
pos_score = torch.mul(pos_score, torch.unsqueeze(mask, dim=1))
#print(pos_score, neg_score)
loss = MemoryLoss(pos_score, neg_score, self.margin)
# Update memory
self.update(query, y, y_hat, y_hat_indices)
return y_hat, softmax_score, loss
def _age_gradient(self, feat_age):
'''
compute age branch gradient direction in age_embed layer
input:
feat_age: output of age_embed layer (before relu)
'''
cls = self.age_cls
feat = feat_age.detach()
feat.requires_grad = True
feat.volatile = False
feat = feat.clone()
feat.retain_grad()
age_fc_out = cls.cls(cls.relu(feat))
if self.opts.cls_type == 'dex':
# deep expectation
age_scale = np.arange(self.opts.min_age, self.opts.max_age + 1, 1.0)
age_scale = Variable(age_fc_out.data.new(age_scale)).unsqueeze(1)
age_out = torch.matmul(F.softmax(age_fc_out), age_scale).view(-1)
elif self.opts.cls_type == 'oh':
# ordinal hyperplane
age_fc_out = F.sigmoid(age_fc_out)
age_out = age_fc_out.sum(dim = 1) + self.opts.min_age
elif self.opts.cls_type == 'reg':
# regression
age_out = self.age_fc_out.view(-1) + self.opts.min_age
age_out.sum().backward()
age_grad = feat.grad
# normalization
age_grad = age_grad / age_grad.norm(p = 2, dim = 1, keepdim = True)
age_grad.detach_()
age_grad.volatile = False
age_grad.requires_grad = False
cls.cls.zero_grad()
return age_grad