def test_backward_computes_backward_pass():
weight = torch.randn(4, 8, 3, 3).cuda()
input = torch.randn(4, 8, 4, 4).cuda()
input_var = Variable(input, requires_grad=True)
weight_var = Parameter(weight)
out_var = F.conv2d(
input=input_var,
weight=weight_var,
bias=None,
stride=1,
padding=1,
dilation=1,
groups=1,
)
out_var.backward(gradient=input_var.data.clone().fill_(1))
out = out_var.data
input_grad = input_var.grad.data
weight_grad = weight_var.grad.data
func = _EfficientConv2d(
stride=1,
padding=1,
dilation=1,
groups=1,
)
out_efficient = func.forward(weight, None, input)
weight_grad_efficient, _, input_grad_efficient = func.backward(
weight, None, input, input.clone().fill_(1))
assert(almost_equal(out, out_efficient))
assert(almost_equal(input_grad, input_grad_efficient))
assert(almost_equal(weight_grad, weight_grad_efficient))
python类conv2d()的实例源码
efficient_conv_test.py 文件源码
项目:efficient_densenet_pytorch
作者: gpleiss
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_contig_wrong_stride_cudnn(self):
# x has to have batch_size 1 to test contiguous checks
x = torch.randn(1, 16, 5, 5).cuda()
stride = list(x.stride())
stride[0] = 20
# change the stride in dimension 0. the tensor is still contiguous because size[0] is 1
x.set_(x.storage(), 0, x.size(), stride)
self.assertTrue(x.is_contiguous())
F.conv_transpose2d(Variable(x), Variable(torch.randn(16, 1, 1, 1)).cuda())
F.conv2d(Variable(x), Variable(torch.randn(1, 16, 1, 1)).cuda())
def test_Conv2d_inconsistent_types(self):
inputs = Variable(torch.randn(4, 1, 7, 7).float())
weights = Variable(torch.randn(1, 1, 3, 3).double())
# inconsistent types should raise an exception
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
# but it should work with the same type
nn.functional.conv2d(inputs.float(), weights.float())
def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self):
inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
bias = Variable(torch.randn(1).double().cuda())
torch.backends.cudnn.enabled = False
# inconsistent types should raise an exception
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias))
# but it should work with the same type
nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self):
inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
bias = Variable(torch.randn(1).double().cuda())
torch.backends.cudnn.enabled = True
# inconsistent types should raise an exception
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias))
# but it should work with the same type
nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
def test_dirac_identity(self):
batch, in_c, out_c, size, kernel_size = 8, 3, 4, 5, 3
# Test 1D
input_var = Variable(torch.randn(batch, in_c, size))
filter_var = Variable(torch.zeros(out_c, in_c, kernel_size))
init.dirac(filter_var)
output_var = F.conv1d(input_var, filter_var)
input_tensor, output_tensor = input_var.data, output_var.data # Variables do not support nonzero
self.assertEqual(input_tensor[:, :, 1:-1], output_tensor[:, :in_c, :]) # Assert in_c outputs are preserved
assert torch.nonzero(output_tensor[:, in_c:, :]).numel() == 0 # Assert extra outputs are 0
# Test 2D
input_var = Variable(torch.randn(batch, in_c, size, size))
filter_var = Variable(torch.zeros(out_c, in_c, kernel_size, kernel_size))
init.dirac(filter_var)
output_var = F.conv2d(input_var, filter_var)
input_tensor, output_tensor = input_var.data, output_var.data
self.assertEqual(input_tensor[:, :, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
assert torch.nonzero(output_tensor[:, in_c:, :, :]).numel() == 0
# Test 3D
input_var = Variable(torch.randn(batch, in_c, size, size, size))
filter_var = Variable(torch.zeros(out_c, in_c, kernel_size, kernel_size, kernel_size))
init.dirac(filter_var)
output_var = F.conv3d(input_var, filter_var)
input_tensor, output_tensor = input_var.data, output_var.data
self.assertEqual(input_tensor[:, :, 1:-1, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
assert torch.nonzero(output_tensor[:, in_c:, :, :, :]).numel() == 0
def mobilenet(depth, width, depthwise_function):
cfg = [64, (128, 2), 128, (256, 2), 256, (512, 2), 512, 512, 512, 512, 512, (1024, 2), 1024]
cast = lambda x: x.cuda()
ni = 32
params = {'conv0': cast(kaiming_normal(torch.Tensor(ni, 3, 3, 3)))}
for i, x in enumerate(cfg):
no = x if isinstance(x, int) else x[0]
params['block%d.conv0' % i] = cast(kaiming_normal(torch.Tensor(ni, 1, 3, 3)))
params['block%d.conv1' % i] = cast(kaiming_normal(torch.Tensor(no, ni, 1, 1)))
ni = no
params = {k: Variable(v, requires_grad=True) for k, v in params.items()}
def f(input, params):
o = F.conv2d(input, params['conv0'], padding=1, stride=2)
o = F.relu(o, inplace=True)
for i, x in enumerate(cfg):
stride = 1 if isinstance(x, int) else x[1]
o = depthwise_function(o, params['block%d.conv0' % i], stride=stride, padding=1)
o = F.conv2d(o, params['block%d.conv1' % i])
o = F.relu(o, inplace=True)
return o
return f, params
def fconv2d(x, w, stride, padding):
return F.conv2d(x, w, stride=stride, padding=padding, groups=x.size(1))
def define_model(params):
def conv2d(input, params, base, stride=1, pad=0):
return F.conv2d(input, params[base + '.weight'],
params[base + '.bias'], stride, pad)
def group(input, params, base, stride, n):
o = input
for i in range(0,n):
b_base = ('%s.block%d.conv') % (base, i)
x = o
o = conv2d(x, params, b_base + '0')
o = F.relu(o)
o = conv2d(o, params, b_base + '1', stride=i==0 and stride or 1, pad=1)
o = F.relu(o)
o = conv2d(o, params, b_base + '2')
if i == 0:
o += conv2d(x, params, b_base + '_dim', stride=stride)
else:
o += x
o = F.relu(o)
return o
# determine network size by parameters
blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None
for k in params.keys()]) for j in range(4)]
def f(input, params, pooling_classif=True):
o = F.conv2d(input, params['conv0.weight'], params['conv0.bias'], 2, 3)
o = F.relu(o)
o = F.max_pool2d(o, 3, 2, 1)
o_g0 = group(o, params, 'group0', 1, blocks[0])
o_g1 = group(o_g0, params, 'group1', 2, blocks[1])
o_g2 = group(o_g1, params, 'group2', 2, blocks[2])
o_g3 = group(o_g2, params, 'group3', 2, blocks[3])
if pooling_classif:
o = F.avg_pool2d(o_g3, 7, 1, 0)
o = o.view(o.size(0), -1)
o = F.linear(o, params['fc.weight'], params['fc.bias'])
return o
return f
def forward(self, X, S1, S2, config):
h = self.h(X)
r = self.r(h)
q = self.q(r)
v, _ = torch.max(q, dim=1, keepdim=True)
for i in range(0, config.k - 1):
q = F.conv2d(torch.cat([r, v], 1),
torch.cat([self.q.weight, self.w], 1),
stride=1,
padding=1)
v, _ = torch.max(q, dim=1, keepdim=True)
q = F.conv2d(torch.cat([r, v], 1),
torch.cat([self.q.weight, self.w], 1),
stride=1,
padding=1)
slice_s1 = S1.long().expand(config.imsize, 1, config.l_q, q.size(0))
slice_s1 = slice_s1.permute(3, 2, 1, 0)
q_out = q.gather(2, slice_s1).squeeze(2)
slice_s2 = S2.long().expand(1, config.l_q, q.size(0))
slice_s2 = slice_s2.permute(2, 1, 0)
q_out = q_out.gather(2, slice_s2).squeeze(2)
logits = self.fc(q_out)
return logits, self.sm(logits)
def forward(self, input):
output = F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)
return sparsify_grad(output, self.k, self.simplified)
def forward(self, input):
bw = binarize(self.weight)
return F.conv2d(input, bw, self.bias, self.stride,
self.padding, self.dilation, self.groups)
def forward(self, input, kernel):
self.weight = Parameter(kernel.data)
# print 'weight: ', self.weight.size()
# print 'bias: ', self.bias.size()
# print 'forward:', type(input.data), type(self.weight.data)
# print 'forward: ', input.size(), self.weight.size()
return F.conv2d(input, kernel, self.bias, self.stride,
self.padding, self.dilation, self.groups)
def test_calculate_gain_linear(self):
for fn in ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose2d', 'conv_transpose2d', 'conv_transpose3d']:
gain = init.calculate_gain(fn)
self.assertEqual(gain, 1)
def test_calculate_gain_linear(self):
for fn in ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose2d', 'conv_transpose2d', 'conv_transpose3d']:
gain = init.calculate_gain(fn)
self.assertEqual(gain, 1)
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size,
inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True):
tensor = torch.Tensor(1)
if use_cuda:
tensor = tensor.cuda()
x = Variable(tensor.new(batch_size, chan_in, inp_size, inp_size), requires_grad=True)
x.data.normal_()
weight = Variable(tensor.new(chan_out, chan_in // groups, kern, kern), requires_grad=True)
weight.data.normal_()
if use_bias:
bias = Variable(tensor.new(chan_out), requires_grad=True)
bias.data.normal_()
else:
bias = None
def func(*inputs):
if no_weight:
lweight = weight
if use_bias:
lx, lbias = inputs
else:
lx, = inputs
lbias = None
else:
if use_bias:
lx, lweight, lbias = inputs
else:
lx, lweight = inputs
lbias = None
# We disable cudnn during forward to avoid finite difference imprecision issues
with use_cudnn(False):
out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups)
return out
if no_weight:
inputs = (x, bias)
else:
inputs = (x, weight, bias)
if not use_bias:
inputs = inputs[:-1]
dummy_out = func(*inputs)
grad_y = Variable(tensor.new(dummy_out.size()), requires_grad=True)
grad_y.data.normal_()
return gradgradcheck(func, inputs, (grad_y,))
def define_teacher(params_file):
""" Defines student resnet
Network size is determined from parameters, assuming
pre-activation basic-block resnet (ResNet-18 or ResNet-34)
"""
params_hkl = hkl.load(params_file)
params = OrderedDict({k: Variable(torch.from_numpy(v).cuda())
for k, v in params_hkl.items()})
blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None
for k in list(params.keys())]) for j in range(4)]
def conv2d(input, params, base, stride=1, pad=0):
return F.conv2d(input, params[base + '.weight'], params[base + '.bias'], stride, pad)
def group(input, params, base, stride, n):
o = input
for i in range(0,n):
b_base = ('%s.block%d.conv') % (base, i)
x = o
o = conv2d(x, params, b_base + '0', pad=1, stride=i==0 and stride or 1)
o = F.relu(o, inplace=True)
o = conv2d(o, params, b_base + '1', pad=1)
if i == 0 and stride != 1:
o += F.conv2d(x, params[b_base + '_dim.weight'], stride=stride)
else:
o += x
o = F.relu(o, inplace=True)
return o
def f(inputs, params, pr=''):
inputs = Variable(inputs.data, volatile=True)
o = conv2d(inputs, params, pr+'conv0', 2, 3)
o = F.relu(o, inplace=True)
o = F.max_pool2d(o, 3, 2, 1)
o_g0 = group(o, params, pr+'group0', 1, blocks[0])
o_g1 = group(o_g0, params, pr+'group1', 2, blocks[1])
o_g2 = group(o_g1, params, pr+'group2', 2, blocks[2])
o_g3 = group(o_g2, params, pr+'group3', 2, blocks[3])
o = F.avg_pool2d(o_g3, 7, 1, 0)
o = o.view(o.size(0), -1)
o = F.linear(o, params[pr+'fc.weight'], params[pr+'fc.bias'])
return Variable(o.data), [Variable(v.data) for v in [o_g0, o_g1, o_g2, o_g3]]
return f, params
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size,
inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True):
tensor = torch.Tensor(1)
if use_cuda:
tensor = tensor.cuda()
x = Variable(tensor.new(batch_size, chan_in, inp_size, inp_size), requires_grad=True)
x.data.normal_()
weight = Variable(tensor.new(chan_out, chan_in // groups, kern, kern), requires_grad=True)
weight.data.normal_()
if use_bias:
bias = Variable(tensor.new(chan_out), requires_grad=True)
bias.data.normal_()
else:
bias = None
def func(*inputs):
if no_weight:
lweight = weight
if use_bias:
lx, lbias = inputs
else:
lx, = inputs
lbias = None
else:
if use_bias:
lx, lweight, lbias = inputs
else:
lx, lweight = inputs
lbias = None
# We disable cudnn during forward to avoid finite difference imprecision issues
with cudnn.flags(enabled=False):
out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups)
return out
if no_weight:
inputs = (x, bias)
else:
inputs = (x, weight, bias)
if not use_bias:
inputs = inputs[:-1]
dummy_out = func(*inputs)
grad_y = Variable(tensor.new(dummy_out.size()), requires_grad=True)
grad_y.data.normal_()
return gradgradcheck(func, inputs, (grad_y,))
def forward(self, x, init=False):
if init is True:
# out_channels, in_channels // groups, * kernel_size
self.V.data.copy_(torch.randn(self.V.data.size()
).type_as(self.V.data) * 0.05)
v_norm = self.V.data / self.V.data.view(self.out_channels, -1)\
.norm(2, 1).view(self.out_channels, *(
[1] * (len(self.kernel_size) + 1))).expand_as(self.V.data)
x_init = F.conv2d(x, Variable(v_norm), None, self.stride,
self.padding, self.dilation, self.groups).data
t_x_init = x_init.transpose(0, 1).contiguous().view(
self.out_channels, -1)
m_init, v_init = t_x_init.mean(1).squeeze(
1), t_x_init.var(1).squeeze(1)
# out_features
scale_init = self.init_scale / \
torch.sqrt(v_init + 1e-10)
self.g.data.copy_(scale_init)
self.b.data.copy_(-m_init * scale_init)
scale_init_shape = scale_init.view(
1, self.out_channels, *([1] * (len(x_init.size()) - 2)))
m_init_shape = m_init.view(
1, self.out_channels, *([1] * (len(x_init.size()) - 2)))
x_init = scale_init_shape.expand_as(
x_init) * (x_init - m_init_shape.expand_as(x_init))
self.V_avg.copy_(self.V.data)
self.g_avg.copy_(self.g.data)
self.b_avg.copy_(self.b.data)
return Variable(x_init)
else:
v, g, b = get_vars_maybe_avg(
self, ['V', 'g', 'b'], self.training,
polyak_decay=self.polyak_decay)
scalar = torch.norm(v.view(self.out_channels, -1), 2, 1)
if len(scalar.size()) == 2:
scalar = g / scalar.squeeze(1)
else:
scalar = g / scalar
w = scalar.view(self.out_channels, *
([1] * (len(v.size()) - 1))).expand_as(v) * v
x = F.conv2d(x, w, b, self.stride,
self.padding, self.dilation, self.groups)
return x