def main(config):
svhn_loader, mnist_loader, svhn_test_loader, mnist_test_loader = get_loader(config)
solver = Solver(config, svhn_loader, mnist_loader)
cudnn.benchmark = True
# create directories if not exist
if not os.path.exists(config.model_path):
os.makedirs(config.model_path)
if not os.path.exists(config.sample_path):
os.makedirs(config.sample_path)
if config.mode == 'train':
solver.train(svhn_test_loader, mnist_test_loader)
elif config.mode == 'sample':
solver.sample()
python类benchmark()的实例源码
def forward(self, input, weight, bias=None):
output = input.new(*self._output_size(input, weight))
if bias is not None:
self.save_for_backward(input, weight, bias)
else:
self.save_for_backward(input, weight)
if cudnn.is_acceptable(input):
self._cudnn_info = torch._C._cudnn_convolution_forward(
input, weight, bias, output, self.pad[0], self.pad[1],
self.stride[0], self.stride[1], self.groups, cudnn.benchmark)
else:
# TODO: implement groups for THNN
if self.groups != 1:
raise ValueError('THNN does not support groups')
backend = type2backend[type(input)]
self._finput = input.new()
self._fgrad_input = input.new()
backend.SpatialConvolutionMM_updateOutput(
backend.library_state, input, output, weight, bias,
self._finput, self._fgrad_input, weight.size(3), weight.size(2),
self.stride[1], self.stride[0], self.pad[1], self.pad[0])
return output
def main(config):
svhn_loader, mnist_loader = get_loader(config)
solver = Solver(config, svhn_loader, mnist_loader)
cudnn.benchmark = True
# create directories if not exist
if not os.path.exists(config.model_path):
os.makedirs(config.model_path)
if not os.path.exists(config.sample_path):
os.makedirs(config.sample_path)
if config.mode == 'train':
solver.train()
elif config.mode == 'sample':
solver.sample()
def main(config):
cudnn.benchmark = True
data_loader = get_loader(image_path=config.image_path,
image_size=config.image_size,
batch_size=config.batch_size,
num_workers=config.num_workers)
solver = Solver(config, data_loader)
# Create directories if not exist
if not os.path.exists(config.model_path):
os.makedirs(config.model_path)
if not os.path.exists(config.sample_path):
os.makedirs(config.sample_path)
# Train and sample the images
if config.mode == 'train':
solver.train()
elif config.mode == 'sample':
solver.sample()
def init_model(net, restore):
"""Init models with cuda and weights."""
# init weights of model
net.apply(init_weights)
# restore model weights
if restore is not None and os.path.exists(restore):
net.load_state_dict(torch.load(restore))
net.restored = True
print("Restore model from: {}".format(os.path.abspath(restore)))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
net.cuda()
return net
motion_cnn.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def run(self):
self.build_model()
self.resume_and_evaluate()
cudnn.benchmark = True
for self.epoch in range(self.start_epoch, self.nb_epochs):
self.train_1epoch()
prec1, val_loss = self.validate_1epoch()
is_best = prec1 > self.best_prec1
#lr_scheduler
self.scheduler.step(val_loss)
# save model
if is_best:
self.best_prec1 = prec1
with open('record/motion/motion_video_preds.pickle','wb') as f:
pickle.dump(self.dic_video_level_preds,f)
f.close()
save_checkpoint({
'epoch': self.epoch,
'state_dict': self.model.state_dict(),
'best_prec1': self.best_prec1,
'optimizer' : self.optimizer.state_dict()
},is_best,'record/motion/checkpoint.pth.tar','record/motion/model_best.pth.tar')
spatial_cnn.py 文件源码
项目:two-stream-action-recognition
作者: jeffreyhuang1
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def run(self):
self.build_model()
self.resume_and_evaluate()
cudnn.benchmark = True
for self.epoch in range(self.start_epoch, self.nb_epochs):
self.train_1epoch()
prec1, val_loss = self.validate_1epoch()
is_best = prec1 > self.best_prec1
#lr_scheduler
self.scheduler.step(val_loss)
# save model
if is_best:
self.best_prec1 = prec1
with open('record/spatial/spatial_video_preds.pickle','wb') as f:
pickle.dump(self.dic_video_level_preds,f)
f.close()
save_checkpoint({
'epoch': self.epoch,
'state_dict': self.model.state_dict(),
'best_prec1': self.best_prec1,
'optimizer' : self.optimizer.state_dict()
},is_best,'record/spatial/checkpoint.pth.tar','record/spatial/model_best.pth.tar')
def init_model(net, restore):
"""Init models with cuda and weights."""
# init weights of model
net.apply(init_weights)
# restore model weights
if restore is not None and os.path.exists(restore):
net.load_state_dict(torch.load(restore))
net.restored = True
print("Restore model from: {}".format(os.path.abspath(restore)))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
net.cuda()
return net
def _update_output(self, input, weight, bias):
self.use_cudnn = cudnn.is_acceptable(input)
if self.use_cudnn and cudnn.version() < 6000:
self.use_cudnn = not self.is_dilated()
if self.use_cudnn:
output = input.new(*self._output_size(input, weight))
if self.transposed:
self._cudnn_info = (
torch._C._cudnn_convolution_transpose_full_forward(
input, weight, bias, output, self.padding, self.stride, self.dilation,
self.groups, cudnn.benchmark))
else:
self._cudnn_info = torch._C._cudnn_convolution_full_forward(
input, weight, bias, output, self.padding, self.stride, self.dilation,
self.groups, cudnn.benchmark)
if not self.requires_grad:
del self._cudnn_info
return output
self._bufs = [[] for g in range(self.groups)]
output = self._thnn('update_output', input, weight, bias)
if not self.requires_grad:
del self._bufs
return output
def _grad_input(self, input, weight, grad_output):
if self.use_cudnn:
grad_input = input.new().resize_as_(input)
if self.transposed:
# ConvTranspose uses the same kernels as regular convolution
# but swaps forward and backward calls
torch._C._cudnn_convolution_forward(
grad_output, weight, grad_input, self._cudnn_info,
cudnn.benchmark)
else:
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
return grad_input
return self._thnn('grad_input', input, weight, grad_output)
def _grad_params(self, input, weight, bias, grad_output):
if self.use_cudnn:
grad_weight = grad_bias = None
if self.needs_input_grad[1]:
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(
grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None and self.needs_input_grad[2]:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(
grad_output, grad_bias, self._cudnn_info)
return grad_weight, grad_bias
return self._thnn('grad_params', input, weight, bias, grad_output)
def _update_output(self, input, weight, bias):
self.use_cudnn = cudnn.is_acceptable(input)
if self.use_cudnn and cudnn.version() < 6000:
self.use_cudnn = not self.is_dilated()
if self.use_cudnn:
output = input.new(*self._output_size(input, weight))
if self.transposed:
self._cudnn_info = (
torch._C._cudnn_convolution_transpose_full_forward(
input, weight, bias, output, self.padding, self.stride, self.dilation,
self.groups, cudnn.benchmark))
else:
self._cudnn_info = torch._C._cudnn_convolution_full_forward(
input, weight, bias, output, self.padding, self.stride, self.dilation,
self.groups, cudnn.benchmark)
if not self.requires_grad:
del self._cudnn_info
return output
self._bufs = [[] for g in range(self.groups)]
output = self._thnn('update_output', input, weight, bias)
if not self.requires_grad:
del self._bufs
return output
def _grad_input(self, input, weight, grad_output):
if self.use_cudnn:
grad_input = input.new().resize_as_(input)
if self.transposed:
# ConvTranspose uses the same kernels as regular convolution
# but swaps forward and backward calls
torch._C._cudnn_convolution_forward(
grad_output, weight, grad_input, self._cudnn_info,
cudnn.benchmark)
else:
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
return grad_input
return self._thnn('grad_input', input, weight, grad_output)
def _grad_params(self, input, weight, bias, grad_output):
if self.use_cudnn:
grad_weight = grad_bias = None
if self.needs_input_grad[1]:
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(
grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None and self.needs_input_grad[2]:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(
grad_output, grad_bias, self._cudnn_info)
return grad_weight, grad_bias
return self._thnn('grad_params', input, weight, bias, grad_output)
densenet_efficient_multi_gpu.py 文件源码
项目:temperature_scaling
作者: gpleiss
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def forward(self, weight, bias, input):
# Assert we're using cudnn
for i in ([weight, bias, input]):
if i is not None and not(cudnn.is_acceptable(i)):
raise Exception('You must be using CUDNN to use _EfficientBatchNorm')
res = input.new(*self._output_size(input, weight))
self._cudnn_info = torch._C._cudnn_convolution_full_forward(
input, weight, bias, res,
(self.padding, self.padding),
(self.stride, self.stride),
(self.dilation, self.dilation),
self.groups, cudnn.benchmark
)
return res
densenet_efficient_multi_gpu.py 文件源码
项目:temperature_scaling
作者: gpleiss
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def backward(self, weight, bias, input, grad_output):
grad_input = input.new()
grad_input.resize_as_(input)
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(grad_output, grad_bias, self._cudnn_info)
else:
grad_bias = None
return grad_weight, grad_bias, grad_input
densenet_efficient_multi_gpu.py 文件源码
项目:efficient_densenet_pytorch
作者: gpleiss
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def forward(self, weight, bias, input):
# Assert we're using cudnn
for i in ([weight, bias, input]):
if i is not None and not(cudnn.is_acceptable(i)):
raise Exception('You must be using CUDNN to use _EfficientBatchNorm')
res = input.new(*self._output_size(input, weight))
self._cudnn_info = torch._C._cudnn_convolution_full_forward(
input, weight, bias, res,
(self.padding, self.padding),
(self.stride, self.stride),
(self.dilation, self.dilation),
self.groups, cudnn.benchmark
)
return res
densenet_efficient_multi_gpu.py 文件源码
项目:efficient_densenet_pytorch
作者: gpleiss
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def backward(self, weight, bias, input, grad_output):
grad_input = input.new()
grad_input.resize_as_(input)
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(grad_output, grad_bias, self._cudnn_info)
else:
grad_bias = None
return grad_weight, grad_bias, grad_input
def forward(self, weight, bias, input):
# Assert we're using cudnn
for i in ([weight, bias, input]):
if i is not None and not(cudnn.is_acceptable(i)):
raise Exception('You must be using CUDNN to use _EfficientBatchNorm')
res = input.new(*self._output_size(input, weight))
self._cudnn_info = torch._C._cudnn_convolution_full_forward(
input, weight, bias, res,
(self.padding, self.padding),
(self.stride, self.stride),
(self.dilation, self.dilation),
self.groups, cudnn.benchmark
)
return res
def backward(self, weight, bias, input, grad_output):
grad_input = input.new()
grad_input.resize_as_(input)
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(grad_output, grad_bias, self._cudnn_info)
else:
grad_bias = None
return grad_weight, grad_bias, grad_input
def get_models():
"""Get models with cuda and inited weights."""
D = Discriminator(num_channels=params.num_channels,
conv_dim=params.d_conv_dim,
image_size=params.image_size,
num_gpu=params.num_gpu,
num_extra_layers=params.num_extra_layers,
use_BN=False)
G = Generator(num_channels=params.num_channels,
z_dim=params.z_dim,
conv_dim=params.g_conv_dim,
image_size=params.image_size,
num_gpu=params.num_gpu,
num_extra_layers=params.num_extra_layers,
use_BN=True)
# init weights of models
D.apply(init_weights)
G.apply(init_weights)
# restore model weights
if params.d_model_restore is not None and \
os.path.exists(params.d_model_restore):
D.load_state_dict(torch.load(params.d_model_restore))
if params.g_model_restore is not None and \
os.path.exists(params.g_model_restore):
G.load_state_dict(torch.load(params.g_model_restore))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
D.cuda()
G.cuda()
print(D)
print(G)
return D, G
def get_models(num_channels, d_conv_dim, g_conv_dim, z_dim, num_gpu,
d_model_restore=None, g_model_restore=None):
"""Get models with cuda and inited weights."""
D = Discriminator(num_channels=num_channels,
conv_dim=d_conv_dim,
num_gpu=num_gpu)
G = Generator(num_channels=num_channels,
z_dim=z_dim,
conv_dim=g_conv_dim,
num_gpu=num_gpu)
# init weights of models
D.apply(init_weights)
G.apply(init_weights)
# restore model weights
if d_model_restore is not None and os.path.exists(d_model_restore):
D.load_state_dict(torch.load(d_model_restore))
if g_model_restore is not None and os.path.exists(g_model_restore):
G.load_state_dict(torch.load(g_model_restore))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
D.cuda()
G.cuda()
return D, G
def get_models():
"""Get models with cuda and inited weights."""
D = Discriminator(num_channels=params.num_channels,
conv_dim=params.d_conv_dim,
image_size=params.image_size,
num_gpu=params.num_gpu,
num_extra_layers=params.num_extra_layers,
use_BN=True)
G = Generator(num_channels=params.num_channels,
z_dim=params.z_dim,
conv_dim=params.g_conv_dim,
image_size=params.image_size,
num_gpu=params.num_gpu,
num_extra_layers=params.num_extra_layers,
use_BN=params.use_BN)
# init weights of models
D.apply(init_weights)
G.apply(init_weights)
# restore model weights
if params.d_model_restore is not None and \
os.path.exists(params.d_model_restore):
D.load_state_dict(torch.load(params.d_model_restore))
if params.g_model_restore is not None and \
os.path.exists(params.g_model_restore):
G.load_state_dict(torch.load(params.g_model_restore))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
D.cuda()
G.cuda()
print(D)
print(G)
return D, G
def setup(opt):
'''
Setups cudnn, seeds and parses updates string.
'''
opt.cuda = not opt.cpu
torch.set_num_threads(4)
if opt.nc is None:
opt.nc = 1 if opt.dataset == 'mnist' else 3
try:
os.makedirs(opt.save_dir)
except OSError:
print('Directory was not created.')
if opt.manual_seed is None:
opt.manual_seed = random.randint(1, 10000)
print("Random Seed: ", opt.manual_seed)
random.seed(opt.manual_seed)
torch.manual_seed(opt.manual_seed)
torch.cuda.manual_seed_all(opt.manual_seed)
cudnn.benchmark = True
if torch.cuda.is_available() and not opt.cuda:
print("WARNING: You have a CUDA device,"
"so you should probably run with --cuda")
updates = {'e': {}, 'g': {}}
updates['e']['num_updates'] = int(opt.e_updates.split(';')[0])
updates['e'].update({x.split(':')[0]: float(x.split(':')[1])
for x in opt.e_updates.split(';')[1].split(',')})
updates['g']['num_updates'] = int(opt.g_updates.split(';')[0])
updates['g'].update({x.split(':')[0]: float(x.split(':')[1])
for x in opt.g_updates.split(';')[1].split(',')})
return updates
def build_experiment(self, batch_size, classes_per_set, samples_per_class, channels, fce):
"""
:param batch_size: The experiment batch size
:param classes_per_set: An integer indicating the number of classes per support set
:param samples_per_class: An integer indicating the number of samples per class
:param channels: The image channels
:param fce: Whether to use full context embeddings or not
:return: a matching_network object, along with the losses, the training ops and the init op
"""
self.classes_per_set = classes_per_set
self.samples_per_class = samples_per_class
self.keep_prob = torch.FloatTensor(1)
self.matchingNet = MatchingNetwork(batch_size=batch_size,
keep_prob=self.keep_prob, num_channels=channels,
fce=fce,
num_classes_per_set=classes_per_set,
num_samples_per_class=samples_per_class,
nClasses = 0, image_size = 28)
self.optimizer = 'adam'
self.lr = 1e-03
self.current_lr = 1e-03
self.lr_decay = 1e-6
self.wd = 1e-4
self.total_train_iter = 0
self.isCudaAvailable = torch.cuda.is_available()
if self.isCudaAvailable:
cudnn.benchmark = True
torch.cuda.manual_seed_all(0)
self.matchingNet.cuda()
def setup(opt, checkpoint):
model = None
if checkpoint != None:
modelPath = os.path.join(opt.resume, checkpoint['modelFile'])
assert os.path.exists(modelPath), 'Saved model not found: '+modelPath
print('=> Resuming model from ' + modelPath)
model = torch.load(modelPath)
else:
print('=> Creating new model')
models = importlib.import_module('models.' + opt.netType)
model = models.createModel(opt)
if isinstance(model, nn.DataParallel):
model = model.get(0)
if opt.resetClassifier and not checkpoint:
pass
#TODO
if opt.cudnn == 'fastest':
cudnn.fastest = True
cudnn.benchmark = True
elif opt.cudnn == 'deterministic':
cudnn.fastest = False
cudnn.benchmark = False
#TODO
if opt.nGPUs > 1:
gpus = opt.GPUs
fastest, benchmark = cudnn.fastest, cudnn.benchmark
# TODO make a dataparallel to split data on different GPUs
optimState = None
if checkpoint != None:
optimPath = os.path.join(opt.resume, checkpoint['optimFile'])
assert os.path.exists(optimPath), 'Saved optimState not found: ' + optimPath
print('=> Resuming optimState from ' + optimPath)
optimState = torch.load(optimPath)
return model, optimState
def main():
cudnn.benchmark = True
net = Net().cuda()
net.eval()
loader = create_coco_loader(config.train_path, config.val_path)
features_shape = (
len(loader.dataset),
config.output_features,
config.output_size,
config.output_size
)
with h5py.File(config.preprocessed_path, libver='latest') as fd:
features = fd.create_dataset('features', shape=features_shape, dtype='float16')
coco_ids = fd.create_dataset('ids', shape=(len(loader.dataset),), dtype='int32')
i = j = 0
for ids, imgs in tqdm(loader):
imgs = Variable(imgs.cuda(async=True), volatile=True)
out = net(imgs)
j = i + imgs.size(0)
features[i:j, :, :] = out.data.cpu().numpy().astype('float16')
coco_ids[i:j] = ids.numpy().astype('int32')
i = j
def main():
if len(sys.argv) > 1:
name = ' '.join(sys.argv[1:])
else:
from datetime import datetime
name = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
target_name = os.path.join('logs', '{}.pth'.format(name))
print('will save to {}'.format(target_name))
cudnn.benchmark = True
train_loader = data.get_loader(train=True)
val_loader = data.get_loader(val=True)
net = nn.DataParallel(model.Net(train_loader.dataset.num_tokens)).cuda()
optimizer = optim.Adam([p for p in net.parameters() if p.requires_grad])
tracker = utils.Tracker()
config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}
for i in range(config.epochs):
_ = run(net, train_loader, optimizer, tracker, train=True, prefix='train', epoch=i)
r = run(net, val_loader, optimizer, tracker, train=False, prefix='val', epoch=i)
results = {
'name': name,
'tracker': tracker.to_dict(),
'config': config_as_dict,
'weights': net.state_dict(),
'eval': {
'answers': r[0],
'accuracies': r[1],
'idx': r[2],
},
'vocab': train_loader.dataset.vocab,
}
torch.save(results, target_name)
def __init__(self, opt):
# tutorials/09 - Image Captioning
# Build Models
self.grad_clip = opt.grad_clip
self.img_enc = EncoderImage(opt.data_name, opt.img_dim, opt.embed_size,
opt.finetune, opt.cnn_type,
use_abs=opt.use_abs,
no_imgnorm=opt.no_imgnorm)
self.txt_enc = EncoderText(opt.vocab_size, opt.word_dim,
opt.embed_size, opt.num_layers,
use_abs=opt.use_abs)
if torch.cuda.is_available():
self.img_enc.cuda()
self.txt_enc.cuda()
cudnn.benchmark = True
# Loss and Optimizer
self.criterion = ContrastiveLoss(margin=opt.margin,
measure=opt.measure,
max_violation=opt.max_violation)
params = list(self.txt_enc.parameters())
params += list(self.img_enc.fc.parameters())
if opt.finetune:
params += list(self.img_enc.cnn.parameters())
self.params = params
self.optimizer = torch.optim.Adam(params, lr=opt.learning_rate)
self.Eiters = 0
def backward(self, grad_output):
tensors = self.saved_tensors
if len(tensors) == 2:
input, weight = tensors
bias = None
else:
input, weight, bias = tensors
grad_input, grad_weight, grad_bias = None, None, None
if cudnn.is_acceptable(input):
if self.needs_input_grad[0]:
grad_input = input.new().resize_as_(input)
torch._C._cudnn_convolution_backward_data(
grad_output, grad_input, weight, self._cudnn_info,
cudnn.benchmark)
if self.needs_input_grad[1]:
grad_weight = weight.new().resize_as_(weight)
torch._C._cudnn_convolution_backward_filter(
grad_output, input, grad_weight, self._cudnn_info,
cudnn.benchmark)
if bias is not None and self.needs_input_grad[2]:
grad_bias = bias.new().resize_as_(bias)
torch._C._cudnn_convolution_backward_bias(
grad_output, grad_bias, self._cudnn_info)
else:
backend = type2backend[type(input)]
if self.needs_input_grad[0]:
grad_input = input.new().resize_as_(input).zero_()
backend.SpatialConvolutionMM_updateGradInput(
backend.library_state, input, grad_output, grad_input,
weight, self._finput, self._fgrad_input, weight.size(3),
weight.size(2), self.stride[1], self.stride[0], self.pad[1],
self.pad[0])
if any(self.needs_input_grad[1:]):
grad_weight = weight.new().resize_as_(weight).zero_()
if bias is not None and self.needs_input_grad[2]:
grad_bias = bias.new().resize_as_(bias).zero_()
else:
grad_bias = None
backend.SpatialConvolutionMM_accGradParameters(
backend.library_state, input, grad_output, grad_weight,
grad_bias, self._finput, self._fgrad_input, weight.size(3),
weight.size(2), self.stride[1], self.stride[0], self.pad[1],
self.pad[0], 1)
if bias is not None:
return grad_input, grad_weight, grad_bias
else:
return grad_input, grad_weight