def accumulate_grads(self, grads):
"""Accumulates gradients from other source.
This method just adds given gradient arrays to gradients that this
optimizer holds. It is typically used in data-parallel optimization,
where gradients for different shards are computed in parallel and
aggregated by this method. This method correctly treats multiple GPU
devices.
Args:
grads (Iterable): Iterable of gradient arrays to be accumulated.
.. deprecated:: v1.5
Use the :meth:`chainer.Link.addgrads` method of the target link
instead.
"""
for param, g_src in zip(self.target.params(), grads):
g_dst = param.grad
if isinstance(g_dst, numpy.ndarray):
g_dst += cuda.to_cpu(g_src)
continue
with cuda.get_device(g_dst):
if (isinstance(g_src, cuda.ndarray) and
g_dst.device != g_src.device):
g_dst += cuda.copy(g_src, out_device=g_dst.device)
else:
g_dst += cuda.to_gpu(g_src)
评论列表
文章目录