def set_by_sample(self, train=True):
xp = self.xp
use_gpu = (xp == cuda.cupy)
for i in range(self.num_layers):
# h
mu, sigma = self.hmus[i], self.hsigmas[i]
e = np.random.normal(0., 1., self.z_size).astype(np.float32)
if use_gpu:
e = cuda.to_gpu(e)
self.decoder.set_h(i, self.get_zh(i)(mu + e * sigma))
# c
mu, sigma = self.cmus[i], self.csigmas[i]
e = np.random.normal(0., 1., self.z_size).astype(np.float32)
if use_gpu:
e = cuda.to_gpu(e)
self.decoder.set_c(i, self.get_zc(i)(mu + e * sigma))
python类to_gpu()的实例源码
def get_model_and_optimizer(result_dir, modelfn, opt, opt_kwargs, net_kwargs, gpu):
model_fn = os.path.basename(modelfn)
model_name = model_fn.split('.')[0]
module = imp.load_source(model_name, modelfn)
net = getattr(module, model_name)
# Copy model definition and this train script to the result dir
dst = '%s/%s' % (result_dir, model_fn)
if not os.path.exists(dst):
shutil.copy(modelfn, dst)
dst = '%s/%s' % (result_dir, os.path.basename(__file__))
if not os.path.exists(dst):
shutil.copy(__file__, dst)
# Create model
model = net(**net_kwargs)
if gpu >= 0:
model.to_gpu(gpu)
# Create optimizer
optimizer = optimizers.__dict__[opt](**opt_kwargs)
optimizer.setup(model)
return model, optimizer
def get_model_and_optimizer(result_dir, modelfn, opt, opt_kwargs, net_kwargs, gpu):
model_fn = os.path.basename(modelfn)
model_name = model_fn.split('.')[0]
module = imp.load_source(model_name, modelfn)
Net = getattr(module, model_name)
dst = '%s/%s' % (result_dir, model_fn)
if not os.path.exists(dst):
shutil.copy(modelfn, dst)
dst = '%s/%s' % (result_dir, os.path.basename(__file__))
if not os.path.exists(dst):
shutil.copy(__file__, dst)
# prepare model
model = Net(**net_kwargs)
if gpu >= 0:
model.to_gpu()
optimizer = optimizers.__dict__[opt](**opt_kwargs)
optimizer.setup(model)
return model, optimizer
def test_index_group_func():
import numpy as np
import cupy as cp
from chainer import cuda
input = np.random.randn(2, 3, 4, 5, 6)
I = np.random.randint(0, 4, (7, 8, 9, 10))
J = np.random.randint(0, 5, (7, 8, 9, 10))
K = np.random.randint(0, 6, (7, 8, 9, 10))
output = input[..., I, J, K].swapaxes(1, 2)
cpoutput = cp.zeros(output.shape)
cpinput = cuda.to_gpu(input)
cpI = cuda.to_gpu(I)
cpJ = cuda.to_gpu(J)
cpK = cuda.to_gpu(K)
index_group_func_kernel(cpinput, cpI, cpJ, cpK, cpoutput)
cpoutput = cuda.to_cpu(cpoutput)
error = np.abs(cpoutput - output).sum()
print(error)
assert np.isclose(error, 0.)
def check_transform_grad(inds, w, transformer, dtype, toll):
from chainer import gradient_check
inds = cuda.to_gpu(inds)
W = Variable(w.astype(dtype))
R = transformer(inds)
RW = R(W)
RW.grad = cp.random.randn(*RW.data.shape).astype(dtype)
RW.backward(retain_grad=True)
func = RW.creator
fn = lambda: func.forward((W.data,))
gW, = gradient_check.numerical_grad(fn, (W.data,), (RW.grad,))
gan = cuda.to_cpu(gW)
gat = cuda.to_cpu(W.grad)
relerr = np.max(np.abs(gan - gat) / np.maximum(np.abs(gan), np.abs(gat)))
print (dtype, toll, relerr)
assert relerr < toll
def check_equivariance(im, layers, input_array, output_array, point_group):
# Transform the image
f = input_array(im)
g = point_group.rand()
gf = g * f
im1 = gf.v
# Apply layers to both images
im = Variable(cuda.to_gpu(im))
im1 = Variable(cuda.to_gpu(im1))
fmap = im
fmap1 = im1
for layer in layers:
layer.to_gpu()
fmap = layer(fmap)
fmap1 = layer(fmap1)
# Transform the computed feature maps
fmap1_garray = output_array(cuda.to_cpu(fmap1.data))
r_fmap1_data = (g.inv() * fmap1_garray).v
fmap_data = cuda.to_cpu(fmap.data)
assert np.allclose(fmap_data, r_fmap1_data, rtol=1e-5, atol=1e-3)
def concat_examples(batch, device=None):
if len(batch) == 0:
raise ValueError('batch is empty')
if device is None:
def to_device(x):
return x
elif device < 0:
to_device = cuda.to_cpu
else:
def to_device(x):
return cuda.to_gpu(x, device, cuda.Stream.null)
result = [to_device(_concat_arrays([s[0] for s in batch], -1)), # ws
to_device(_concat_arrays([s[1] for s in batch], -1)), # ps
to_device(_concat_arrays([s[2] for s in batch], -1)), # ss
[s[3] for s in batch]] # ls
if len(batch[0]) == 7:
result.append([to_device(s[4]) for s in batch]) # cat_ts
result.append([to_device(s[5]) for s in batch]) # dep_ts
result.append(to_device(_concat_arrays([s[6] for s in batch], None))) # weights
return tuple(result)
nutszebra_ilsvrc_object_localization_with_multi_gpus.py 文件源码
项目:trainer
作者: nutszebra
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def setup_workers(self):
# work only once
if self._initialized:
return
self._initialized = True
self.model.cleargrads()
for i in six.moves.range(1, len(self.gpus)):
pipe, worker_end = multiprocessing.Pipe()
worker = _Worker(i, worker_end, self.model, self.gpus, self.da, int(float(self.batch) / len(self.gpus) / self.train_batch_divide), self)
worker.start()
self._workers.append(worker)
self._pipes.append(pipe)
with cuda.Device(self.gpus[0]):
self.model.to_gpu(self.gpus[0])
if len(self.gpus) > 1:
communication_id = nccl.get_unique_id()
self._send_message(("set comm_id", communication_id))
self.communication = nccl.NcclCommunicator(len(self.gpus),
communication_id,
0)
def setup_workers(self):
# work only once
if self._initialized:
return
self._initialized = True
self.model.zerograds()
for i in six.moves.range(1, len(self.gpus)):
pipe, worker_end = multiprocessing.Pipe()
worker = _Worker(i, worker_end, self.model, self.gpus, self.da, int(self.batch / len(self.gpus) / self.train_batch_divide), self)
worker.start()
self._workers.append(worker)
self._pipes.append(pipe)
with cuda.Device(self.gpus[0]):
self.model.to_gpu(self.gpus[0])
if len(self.gpus) > 1:
communication_id = nccl.get_unique_id()
self._send_message(("set comm_id", communication_id))
self.communication = nccl.NcclCommunicator(len(self.gpus),
communication_id,
0)
def features_to_minibatch(self, features, sentences, max_feature_length, max_sentence_length, gpu=True):
x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch = self.processor.features_to_minibatch(features, sentences, max_feature_length, max_sentence_length, self.token_ids,
self.id_blank)
if self.stats_total > 0:
for x, length in zip(x_batch, x_length_batch):
self._update_stats_recursively(x[..., :length])
x_mean, x_std = self.get_mean_and_std()
x_batch = (x_batch - x_mean) / x_std
if gpu:
x_batch = cuda.to_gpu(x_batch.astype(np.float32))
t_batch = cuda.to_gpu(t_batch.astype(np.int32))
bigram_batch = cuda.to_gpu(bigram_batch.astype(np.int32))
x_length_batch = cuda.to_gpu(np.asarray(x_length_batch).astype(np.int32))
t_length_batch = cuda.to_gpu(np.asarray(t_length_batch).astype(np.int32))
return x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch
def __init__(self, d, f, R, gpu):
self.d = d
self.f = f
self.R = R
self.gpu = gpu
g = ChainList(*[L.Linear(1, f) for i in six.moves.range(AtomIdMax)])
H = ChainList(*[L.Linear(f, f) for i in six.moves.range(R)])
W = ChainList(*[L.Linear(f, d) for i in six.moves.range(R + 1)])
self.optimizer = optimizers.Adam()
self.model = Chain(H=H, W=W, g=g)
if gpu:
self.model.to_gpu(0)
self.optimizer.setup(self.model)
self.to = [[] for i in six.moves.range(2)]
self.atom_sid = [[] for i in six.moves.range(2)]
self.anum = [[] for i in six.moves.range(2)]
def test_forward_gpu(self):
x = chainer.Variable(self.x)
t = chainer.Variable(self.t)
y = self.link(x, t)
self.assertEqual(y.data.dtype, numpy.float32)
self.assertEqual(y.data.shape, ())
# fix samples
negative_sampling.NegativeSamplingFunction.samples = cuda.to_gpu(
y.creator.samples)
self.link.to_gpu()
y_g = self.link(chainer.Variable(cuda.to_gpu(self.x)),
chainer.Variable(cuda.to_gpu(self.t)))
del negative_sampling.NegativeSamplingFunction.samples
self.assertEqual(y_g.data.dtype, numpy.float32)
self.assertEqual(y_g.data.shape, ())
gradient_check.assert_allclose(y.data, y_g.data, atol=1.e-4)
def __init__(self, n_history, n_action, on_gpu=False):
self.n_history = n_history
self.n_action = n_action
self.on_gpu = on_gpu
super(Q, self).__init__(
l1=F.Convolution2D(n_history, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2=F.Convolution2D(32, 64, ksize=3, stride=2, nobias=False, wscale=np.sqrt(2)),
l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
out=F.Linear(512, self.n_action, initialW=np.zeros((n_action, 512), dtype=np.float32))
)
if on_gpu:
self.to_gpu()
def arr_to_gpu(self, arr):
return arr if not self.on_gpu else cuda.to_gpu(arr)
def gan_sampling(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128):
@chainer.training.make_extension()
def samples_generation(trainer):
if not os.path.exists(eval_folder):
os.makedirs(eval_folder)
z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
if gpu>=0:
z = cuda.to_gpu(z)
z = Variable(z, volatile=True)
imgs = gen(z, test=True)
save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
grid_w=rows, grid_h=cols)
return samples_generation
def gan_sampling_tags(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128, attr_len=38, threshold=0.25):
@chainer.training.make_extension()
def get_fake_tag():
prob2 = np.random.rand(attr_len)
tags = np.zeros((attr_len)).astype("f")
tags[:] = -1.0
tags[np.argmax(prob2[0:13])]=1.0
tags[27 + np.argmax(prob2[27:])] = 1.0
prob2[prob2<threshold] = -1.0
prob2[prob2>=threshold] = 1.0
for i in range(13, 27):
tags[i] = prob2[i]
return tags
def get_fake_tag_batch():
xp = gen.xp
batch = rows*cols
tags = xp.zeros((batch, attr_len)).astype("f")
for i in range(batch):
tags[i] = xp.asarray(get_fake_tag())
return tags
def samples_generation(trainer):
if not os.path.exists(eval_folder):
os.makedirs(eval_folder)
z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
if gpu>=0:
z = cuda.to_gpu(z)
tags =get_fake_tag_batch()
z = Variable(z, volatile=True)
tags = Variable(tags, volatile=True)
imgs = gen(F.concat([z,tags]), test=True)
save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
grid_w=rows, grid_h=cols)
return samples_generation
def to_gpu(self):
super(Encoder, self).to_gpu()
for i in range(self.num_layers):
h = self.get_h(i)
c = self.get_c(i)
if h is not None: h.to_gpu()
if c is not None: c.to_gpu()
def to_gpu(self):
super(RNNLM, self).to_gpu()
for i in range(self.num_layers):
h = self.get_h(i)
c = self.get_c(i)
if h is not None: h.to_gpu()
if c is not None: c.to_gpu()
def to_gpu(self):
super(VAELM, self).to_gpu()
self.encoder.to_gpu()
self.decoder.to_gpu()
for i in range(self.num_layers):
self.get_htrans(i).to_gpu()
self.get_ctrans(i).to_gpu()
def train(model, batch, num_samples, word_keep_rate, UNK, alpha):
xp = model.xp
use_gpu = (xp == cuda.cupy)
if use_gpu:
batch = cuda.to_gpu(batch)
KL, xents = forward(model, batch, num_samples=num_samples, word_keep_rate=word_keep_rate, UNK=UNK, train=True)
loss = alpha * KL + sum(xents) / num_samples
loss.backward()
optimizer.update()
loss.unchain_backward()
if alpha == 0: KL.unchain_backward()
def batch_to_vars(batch, device=-1):
import chainer
from chainer import cuda
in_arrays = [np.asarray(x) for x in zip(*batch)]
if device >= 0:
in_arrays = [cuda.to_gpu(x, device=device) for x in in_arrays]
in_vars = [chainer.Variable(x) for x in in_arrays]
return in_vars
# -----------------------------------------------------------------------------
# Color Util
# -----------------------------------------------------------------------------
def _diagonal_idx_array(batch_size, n):
idx_offsets = np.arange(
start=0, stop=batch_size * n * n, step=n * n, dtype=np.int32).reshape(
(batch_size, 1))
idx = np.ravel_multi_index(
np.diag_indices(n), (n, n)).reshape((1, n)).astype(np.int32)
return cuda.to_gpu(idx + idx_offsets)
def _non_diagonal_idx_array(batch_size, n):
idx_offsets = np.arange(
start=0, stop=batch_size * n * n, step=n * n, dtype=np.int32).reshape(
(batch_size, 1))
idx = np.ravel_multi_index(
np.tril_indices(n, -1), (n, n)).reshape((1, -1)).astype(np.int32)
return cuda.to_gpu(idx + idx_offsets)
def test_forward_gpu(self):
self.check_forward(cuda.to_gpu(self.x))
def test_forward_gpu(self):
xs_gpu = [chainer.cuda.to_gpu(x) for x in self.xs]
self.check_forward(xs_gpu)
def test_backward_gpu(self):
xs_gpu = [chainer.cuda.to_gpu(x) for x in self.xs]
self.check_backward(xs_gpu, cuda.to_gpu(self.gy))
def test_forward_gpu(self):
self.check_forward(cuda.to_gpu(self.x))
def test_forward_gpu(self):
self.check_forward(cuda.to_gpu(self.diag), cuda.to_gpu(self.non_diag))
def test_backward_gpu(self):
self.check_backward((cuda.to_gpu(self.diag), cuda.to_gpu(
self.non_diag)), cuda.to_gpu(self.gy))
def test_forward_gpu(self):
xs_gpu = [chainer.cuda.to_gpu(x) for x in self.xs]
self.check_forward(xs_gpu)