def fake_cumprod(vb):
"""
args:
vb: [hei x wid]
-> NOTE: we are lazy here so now it only supports cumprod along wid
"""
# real_cumprod = torch.cumprod(vb.data, 1)
vb = vb.unsqueeze(0)
mul_mask_vb = Variable(torch.zeros(vb.size(2), vb.size(1), vb.size(2))).type_as(vb)
for i in range(vb.size(2)):
mul_mask_vb[i, :, :i+1] = 1
add_mask_vb = 1 - mul_mask_vb
vb = vb.expand_as(mul_mask_vb) * mul_mask_vb + add_mask_vb
# vb = torch.prod(vb, 2).transpose(0, 2) # 0.1.12
vb = torch.prod(vb, 2, keepdim=True).transpose(0, 2) # 0.2.0
# print(real_cumprod - vb.data) # NOTE: checked, ==0
return vb
python类cumprod()的实例源码
def test_cumprod(self):
x = torch.rand(100, 100)
res1 = torch.cumprod(x, 1)
res2 = torch.Tensor()
torch.cumprod(res2, x, 1)
self.assertEqual(res1, res2)
def forward(self, x):
x0 = self.conv.forward(x.float())
x = self.pool_mil(x0)
x = x.squeeze(2).squeeze(2)
x1 = torch.add(torch.mul(x0.view(x.size(0), 1000, -1), -1), 1)
cumprod = torch.cumprod(x1, 2)
out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1))
#out = F.softmax(out)
return out
def forward(self, img, att_size=14):
x0 = self.conv(img)
x = self.pool_mil(x0)
x = x.squeeze(2).squeeze(2)
x = self.l1(x)
x1 = torch.add(torch.mul(x.view(x.size(0), 1000, -1), -1), 1)
cumprod = torch.cumprod(x1, 2)
out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1))
return out
def test_cumprod(self):
x = torch.rand(100, 100)
res1 = torch.cumprod(x, 1)
res2 = torch.Tensor()
torch.cumprod(x, 1, out=res2)
self.assertEqual(res1, res2)
def test_cumprod(self):
x = torch.rand(100, 100)
res1 = torch.cumprod(x, 1)
res2 = torch.Tensor()
torch.cumprod(x, 1, out=res2)
self.assertEqual(res1, res2)
def forward(ctx, input, dim):
ctx.dim = dim
ctx.save_for_backward(input)
return torch.cumprod(input, dim=ctx.dim)
def test_cumprod(self):
x = torch.rand(100, 100)
res1 = torch.cumprod(x, 1)
res2 = torch.Tensor()
torch.cumprod(x, 1, out=res2)
self.assertEqual(res1, res2)
def _allocation(self, usage_vb, epsilon=1e-6):
"""
computes allocation by sorting usage, a = a_t[\phi_t[j]]
variables needed:
usage_vb: [batch_size x mem_hei]
-> indicating current memory usage, this is equal to u_t in
the paper when we only have one write head, but for
multiple write heads, one should update the usage while
iterating through the write heads to take into account the
allocation returned by this function
returns:
alloc_vb: [batch_size x num_write_heads x mem_hei]
"""
# ensure values are not too small prior to cumprod
usage_vb = epsilon + (1 - epsilon) * usage_vb
# NOTE: we sort usage in ascending order
sorted_usage_vb, indices_vb = torch.topk(usage_vb, k=self.mem_hei, dim=1, largest=False)
# to imitate tf.cumrprod(exclusive=True) https://discuss.pytorch.org/t/cumprod-exclusive-true-equivalences/2614/8
cat_sorted_usage_vb = torch.cat((Variable(torch.ones(self.batch_size, 1)).type(self.dtype), sorted_usage_vb), 1)[:, :-1]
# TODO: seems we have to wait for this PR: https://github.com/pytorch/pytorch/pull/1439
prod_sorted_usage_vb = fake_cumprod(cat_sorted_usage_vb)
# prod_sorted_usage_vb = torch.cumprod(cat_sorted_usage_vb, dim=1) # TODO: use this once the PR is ready
# alloc_weight_vb = (1 - sorted_usage_vb) * prod_sorted_usage_vb # equ. (1) # 0.1.12
alloc_weight_vb = (1 - sorted_usage_vb) * prod_sorted_usage_vb.squeeze() # equ. (1) # 0.2.0
_, indices_vb = torch.topk(indices_vb, k=self.mem_hei, dim=1, largest=False)
alloc_weight_vb = alloc_weight_vb.gather(1, indices_vb)
return alloc_weight_vb
def forward(ctx, input, dim):
ctx.dim = dim
ctx.save_for_backward(input)
return torch.cumprod(input, dim=ctx.dim)
def test_cumprod(self):
x = torch.rand(100, 100)
res1 = torch.cumprod(x, 1)
res2 = torch.Tensor()
torch.cumprod(x, 1, out=res2)
self.assertEqual(res1, res2)
def cumprod(x, axis=0):
def _cumprod(x, axis=axis):
y = torch.cumprod(x, axis)
return y
def _compute_output_shape(x, axis=axis):
return _get_shape(x)
return get_op(_cumprod, output_shape=_compute_output_shape, arguments=[axis])(x)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
def backward(ctx, grad_output):
def safe_zeros_backward(inp, dim):
# note that the gradient is equivalent to:
# cumprod(exclusive, normal) * cumprod(exclusive, reverse), e.g.:
# input: [ a, b, c]
# cumprod(exclusive, normal): [1 , a, a * b]
# cumprod(exclusive, reverse): [b * c, c, 1]
# product: [b * c, a * c, a * b]
# and this is safe under input with 0s.
if inp.size(dim) == 1:
return grad_output
ones_size = torch.Size((inp.size()[:dim] + (1,) + inp.size()[dim + 1:]))
ones = Variable(grad_output.data.new(ones_size).fill_(1))
exclusive_normal_nocp = torch.cat((ones, inp.narrow(dim, 0, inp.size(dim) - 1)), dim)
exclusive_normal = exclusive_normal_nocp.cumprod(dim)
def reverse_dim(var, dim):
index = Variable(torch.arange(var.size(dim) - 1, -1, -1, out=var.data.new().long()))
return var.index_select(dim, index)
narrow_reverse = reverse_dim(inp.narrow(dim, 1, inp.size(dim) - 1), dim)
exclusive_reverse_nocp = torch.cat((ones, narrow_reverse), dim)
exclusive_reverse = reverse_dim(exclusive_reverse_nocp.cumprod(dim), dim)
grad_input = grad_output.expand_as(exclusive_normal).mul(exclusive_normal.mul(exclusive_reverse))
return grad_input
if ctx.dim is None:
input, = ctx.saved_variables
zero_idx = (input.data == 0).nonzero()
if zero_idx.dim() == 0:
return grad_output.mul(ctx.result).expand_as(input).div(input), None, None
elif zero_idx.size(0) > 1:
return (grad_output * 0).expand_as(input), None, None
else:
return safe_zeros_backward(input.contiguous().view(-1), 0).view_as(input), None, None
else:
input, output = ctx.saved_variables
dim = ctx.dim if ctx.dim >= 0 else ctx.dim + input.dim()
if ctx.keepdim is False and len(ctx.input_size) != 1:
grad_output = grad_output.unsqueeze(dim)
output = output.unsqueeze(dim)
zero_mask = input == 0
slice_zero_count = zero_mask.sum(dim, True)
total_zeros = slice_zero_count.data.sum()
if total_zeros == 0:
grad_input = grad_output.mul(output).expand_as(input).div(input)
else:
grad_input = safe_zeros_backward(input, dim)
return grad_input, None, None