def train(model, batch, num_samples, word_keep_rate, UNK, alpha):
xp = model.xp
use_gpu = (xp == cuda.cupy)
if use_gpu:
batch = cuda.to_gpu(batch)
KL, xents = forward(model, batch, num_samples=num_samples, word_keep_rate=word_keep_rate, UNK=UNK, train=True)
loss = alpha * KL + sum(xents) / num_samples
loss.backward()
optimizer.update()
loss.unchain_backward()
if alpha == 0: KL.unchain_backward()
python类cupy()的实例源码
def save_hdf5(filename, obj):
gpu = (hasattr(obj, "xp") and obj.xp == cuda.cupy)
if gpu: obj.to_cpu()
serializers.save_hdf5(filename, obj)
if gpu: obj.to_gpu()
def make_batch(datas, train=True):
allconcat = np.concatenate(datas, axis=0)
if args.gpu >= 0:
allconcat = cuda.cupy.array(allconcat)
batch = xp.split(allconcat, allconcat.shape[1], axis=1)
batch = [xp.reshape(x, (x.shape[0], x.shape[2])) for x in batch]
return batch
def forward(self, inputs):
xp = cuda.get_array_module(*inputs)
x0, x1 = inputs
self.diff = self.inside_weights * (x0 - x1)
abs_diff = xp.abs(self.diff)
flag = abs_diff < 1.0 / self.sigma2
y = (flag * 0.5 * xp.square(self.diff) * self.sigma2 +
(~flag) * (abs_diff - 0.5 / self.sigma2))
if xp == cuda.cupy:
with cuda.Device(cuda.get_device(y)):
num = xp.prod(xp.asarray(y.shape))
else:
num = xp.prod(y.shape)
return xp.array(y.sum() / num).astype(numpy.float32),
def forward_gpu(self, inputs):
cupy = cuda.cupy
x, t = inputs
if chainer.is_debug():
self._check_input_values(x, t)
log_y = log_softmax._log_softmax(x, self.use_cudnn)
if self.cache_score:
self.y = cupy.exp(log_y)
if self.class_weight is not None:
shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
log_y *= cupy.broadcast_to(
self.class_weight.reshape(shape), x.shape)
if self.normalize:
coeff = cupy.maximum(1, (t != self.ignore_label).sum())
else:
coeff = max(1, len(t))
self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)
log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
ret = cuda.reduce(
'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
't == -1 ? T(0) : log_y[_j * n_channel + t]',
'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
)(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
return ret,
def backward_gpu(self, inputs, grad_outputs):
cupy = cuda.cupy
x, t = inputs
if hasattr(self, 'y'):
y = self.y
else:
y = log_softmax._log_softmax(x, self.use_cudnn)
cupy.exp(y, out=y)
gloss = grad_outputs[0]
n_unit = t.size // len(t)
coeff = gloss * self._coeff
if self.class_weight is None:
gx = cuda.elementwise(
'T y, S t, raw T coeff, S n_channel, S n_unit',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t)));
''',
'softmax_crossent_bwd')(
y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
else:
gx = cuda.elementwise(
'T y, raw T w, S t, raw T coeff, S n_channel, S n_unit',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = t == -1 ? 0 : coeff[0] * (y - (c == t)) * w[t];
''',
'softmax_crossent_bwd')(
y, self.class_weight, cupy.expand_dims(t, 1), coeff,
x.shape[1], n_unit)
return gx, None
def softmax_cross_entropy(
x, t, use_cudnn=True, normalize=True, cache_score=True,
class_weight=None):
"""Computes cross entropy loss for pre-softmax activations.
Args:
x (~chainer.Variable): Variable holding a multidimensional array whose
element indicates unnormalized log probability: the first axis of
the variable represents the number of samples, and the second axis
represents the number of classes. While this function computes
a usual softmax cross entropy if the number of dimensions is equal
to 2, it computes a cross entropy of the replicated softmax if the
number of dimensions is greater than 2.
t (~chainer.Variable): Variable holding an int32 vector of ground truth
labels. If ``t[i] == -1``, corresponding ``x[i]`` is ignored.
normalize (bool): If ``True``, this function normalizes the cross
entropy loss across all instances. If ``False``, it only
normalizes along a batch size.
cache_score (bool): When it is ``True``, the function stores result
of forward computation to use it on backward computation. It
reduces computational cost though consumes more memory.
class_weight (~numpy.ndarray or ~cupy.ndarray): An array that contains
constant weights that will be multiplied with the loss values along
with the second dimension. The shape of this array should be
``(x.shape[1],)``.
Returns:
Variable: A variable holding a scalar array of the cross entropy loss.
.. note::
This function is differentiable only by ``x``.
"""
return SoftmaxCrossEntropy(
use_cudnn, normalize, cache_score, class_weight)(x, t)
def forward_gpu(self, x):
xp = cuda.cupy
n, c, h, w = x[0].shape
if self.outh is None:
self.outh = conv.get_deconv_outsize(
h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
if self.outw is None:
self.outw = conv.get_deconv_outsize(
w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
up_y = conv.im2col_gpu(
up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
cover_all=self.cover_all)
up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
n, c, oy, ox, ky, kx = up_y.shape
indexes = xp.asarray(self.indexes, dtype=numpy.int32)
xp.ElementwiseKernel(
'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,'
'int32 ky, int32 kx', 'raw float32 up_y',
'''
int yn = i / c / oy / ox;
int yc = (i / oy / ox) % c;
int yoy = (i / ox) % oy;
int yox = i % ox;
up_y[yn * c * oy * ox * ky * kx + \
yc * oy * ox * ky * kx + \
yoy * ox * ky * kx + \
yox * ky * kx + \
index] = x;
''',
'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
self.outh, self.outw)
return up_y,
def backward_gpu(self, x, gy):
xp = cuda.cupy
gcol = conv.im2col_gpu(
gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
cover_all=self.cover_all)
gcol = gcol.transpose(0, 1, 4, 5, 2, 3)
n, c, oy, ox, ky, kx = gcol.shape
gcol = gcol.reshape((n, c, oy, ox, ky * kx))
indexes = xp.asarray(self.indexes, dtype=numpy.int32)
gx = xp.empty((n, c, oy, ox), dtype=x[0].dtype)
xp.ElementwiseKernel(
'int32 indexes, raw float32 gcol, int32 n, int32 c, int32 oy,'
'int32 ox, int32 ky, int32 kx',
'raw float32 gx',
'''
int ind_n = i / c / oy / ox;
int ind_c = (i / oy / ox) % c;
int ind_oy = (i / ox) % oy;
int ind_ox = i % ox;
int gcol_ky = indexes / kx;
int gcol_kx = indexes % kx;
float top_gx = gcol[ind_n * c * oy * ox * ky * kx + \
ind_c * oy * ox * ky * kx + \
ind_oy * ox * ky * kx + \
ind_ox * ky * kx + \
gcol_ky * kx + \
gcol_kx];
gx[ind_n * c * oy * ox + \
ind_c * oy * ox + \
ind_oy * ox + \
ind_ox] = top_gx;
''',
'upsampling_2d_bwd')(indexes, gcol, n, c, oy, ox, ky, kx, gx)
return gx,
def eps_greedy(self, state, exploration_rate):
prop = np.random.uniform()
q_max = None
q_min = None
if prop < exploration_rate:
# Select a random action
action_index = np.random.randint(0, len(config.ale_actions))
else:
# Select a greedy action
state = Variable(state)
if config.use_gpu:
state.to_gpu()
q = self.compute_q_variable(state, test=True)
if config.use_gpu:
action_index = cuda.to_cpu(cuda.cupy.argmax(q.data))
q_max = cuda.to_cpu(cuda.cupy.max(q.data))
q_min = cuda.to_cpu(cuda.cupy.min(q.data))
else:
action_index = np.argmax(q.data)
q_max = np.max(q.data)
q_min = np.min(q.data)
action = self.get_action_with_index(action_index)
# No-op
self.no_op_count = self.no_op_count + 1 if action == 0 else 0
if self.no_op_count > config.rl_no_op_max:
no_op_index = np.argmin(np.asarray(config.ale_actions))
actions_without_no_op = []
for i in range(len(config.ale_actions)):
if i == no_op_index:
continue
actions_without_no_op.append(config.ale_actions[i])
action_index = np.random.randint(0, len(actions_without_no_op))
action = actions_without_no_op[action_index]
print "Reached no_op_max.", "New action:", action
return action, q_max, q_min
def backward_gpu(self, inputs, grad_outputs):
cupy = cuda.cupy
x, t = inputs
if hasattr(self, 'y'):
y = self.y
else:
y = log_softmax._log_softmax(x)
cupy.exp(y, out=y)
gloss = grad_outputs[0]
n_unit = t.size // len(t)
if self.reduce == 'mean':
coeff = gloss * self._coeff
else:
coeff = gloss[:, None, ...]
if self.class_weight is None:
gx = cuda.elementwise(
'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = t == ignore_label ? 0 : coeff * (y - (c == t));
''',
'softmax_crossent_bwd')(
y, cupy.expand_dims(t, 1), coeff, x.shape[1],
n_unit, self.ignore_label)
else:
gx = cuda.elementwise(
'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
'S ignore_label',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
''',
'softmax_crossent_weight_bwd')(
y, self.class_weight, cupy.expand_dims(t, 1), coeff,
x.shape[1], n_unit, self.ignore_label)
return gx, None
def compute_accuracy_batch(model, batch):
source, target = make_source_target_pair(batch)
if model.xp is cuda.cupy:
source = cuda.to_gpu(source)
target = cuda.to_gpu(target)
model.reset_state()
Y = model(source)
return float(F.accuracy(Y, target, ignore_label=ID_PAD).data)
def forward_one_step(self, state, action, reward, next_state, test=False):
xp = cuda.cupy if config.use_gpu else np
n_batch = state.shape[0]
state = Variable(state)
next_state = Variable(next_state)
if config.use_gpu:
state.to_gpu()
next_state.to_gpu()
q = self.compute_q_variable(state, test=test)
max_target_q = self.compute_target_q_variable(next_state, test=test)
max_target_q = xp.amax(max_target_q.data, axis=1)
target = q.data.copy()
for i in xrange(n_batch):
if episode_ends[i] is True:
target_value = np.sign(reward[i])
else:
target_value = np.sign(reward[i]) + config.rl_discount_factor * max_target_q[i]
action_index = self.get_index_with_action(action[i])
old_value = target[i, action_index]
diff = target_value - old_value
if diff > 1.0:
target_value = 1.0 + old_value
elif diff < -1.0:
target_value = -1.0 + old_value
target[i, action_index] = target_value
target = Variable(target)
loss = F.mean_squared_error(target, q)
return loss, q
def forward_one_step(self, state, action, reward, next_state, test=False):
xp = cuda.cupy if config.use_gpu else np
n_batch = state.shape[0]
state = Variable(state.reshape((n_batch, config.rl_history_length * 34)))
next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34)))
if config.use_gpu:
state.to_gpu()
next_state.to_gpu()
q = self.compute_q_variable(state, test=test)
q_ = self.compute_q_variable(next_state, test=test)
max_action_indices = xp.argmax(q_.data, axis=1)
if config.use_gpu:
max_action_indices = cuda.to_cpu(max_action_indices)
target_q = self.compute_target_q_variable(next_state, test=test)
target = q.data.copy()
for i in xrange(n_batch):
max_action_index = max_action_indices[i]
target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]]
action_index = self.get_index_for_action(action[i])
old_value = target[i, action_index]
diff = target_value - old_value
if diff > 1.0:
target_value = 1.0 + old_value
elif diff < -1.0:
target_value = -1.0 + old_value
target[i, action_index] = target_value
target = Variable(target)
loss = F.mean_squared_error(target, q)
return loss, q
def xp(self):
"""Array module for this link.
Depending on which of CPU/GPU this link is on, this property returns
:mod:`numpy` or :mod:`cupy`.
"""
return numpy if self._cpu else cuda.cupy
def prepare(self):
"""Prepares for an update.
This method initializes missing optimizer states (e.g. for newly added
parameters after the set up), and copies arrays in each state
dictionary to CPU or GPU according to the corresponding parameter
array.
"""
states = self._states
for name, param in self.target.namedparams():
if name not in states:
state = {}
self.init_state(param, state)
states[name] = state
else:
state = states[name]
with cuda.get_device(param.data) as dev:
if int(dev) == -1: # cpu
for key, value in six.iteritems(state):
if isinstance(value, cuda.ndarray):
state[key] = value.get()
else: # gpu
cupy = cuda.cupy
for key, value in six.iteritems(state):
if isinstance(value, numpy.ndarray):
state[key] = cuda.to_gpu(value)
elif (isinstance(value, cupy.ndarray) and
value.device != dev):
state[key] = cupy.copy(value)
def init_state_gpu(self, param, state):
"""Initializes the optimizer state on GPU.
This method is called from :meth:`init_state` by default.
Args:
param (~chainer.Variable): Parameter variable. Its data array is
of type :class:`cupy.ndarray`.
state (dict): State dictionary.
.. seealso:: :meth:`init_state`
"""
pass
def forward_gpu(self, inputs):
cupy = cuda.cupy
mean, ln_var = inputs
if self.eps is None:
self.eps = cupy.random.standard_normal(
ln_var.shape, dtype=mean.dtype)
self.noise = cuda.cupy.empty_like(mean)
self.noise = cuda.elementwise(
'T v, T e', 'T noise',
'noise = exp(v / 2) * e',
'gaussian_forward'
)(ln_var, self.eps)
return mean + self.noise,
def backward_gpu(self, inputs, grad_outputs):
cupy = cuda.cupy
x, t = inputs
gloss = grad_outputs[0]
n_unit = t.size // len(t)
coeff = gloss * self._coeff
gx = cuda.elementwise(
'T y, S t, raw T coeff, S n_channel, S n_unit, raw T weights',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = ((t == -1) || (c != t)) ? 0 : ((weights[t]*coeff[0]) / max(y, 1e-5));
''',
'crossent_bwd')(self.y, cupy.expand_dims(t, 1), -coeff, x.shape[1], n_unit, self.weights.reduced_view())
return gx, None
def debug_print(self):
"""Display a summary of the stored data and location of the Variable"""
msg = """{summary}
- device: {device}
- volatile: {volatile}
- backend: {background}
- shape: {shape}
- dtype: {dtype}
- statistics: {stats}
- grad: {grad}"""
stats_msg = 'mean={0:.8f}, std={1:.8f}'
try:
device = self.data.device
except AttributeError:
device = 'CPU'
with cuda.get_device(self.data) as dev:
xp = numpy if int(dev) == -1 else cuda.cupy
if self.grad is None:
grad = None
elif xp.all(self.grad == 0):
grad = 0
else:
grad = stats_msg.format(float(xp.mean(self.grad)),
float(xp.std(self.grad)))
stats = stats_msg.format(float(xp.mean(self.data)),
float(xp.std(self.data)))
return msg.format(summary=repr(self), volatile=self.volatile,
grad=grad, shape=self.data.shape,
background=type(self.data),
dtype=self.data.dtype, device=device,
stats=stats)