def backward(self, inputs, grad_output):
xp = cuda.get_array_module(inputs[0])
batch_size = len(inputs[2])
total_probability = _logsumexp(self.prob_trans[0], xp, axis=1)
label_prob = _compute_label_probability(self.yseq.shape[2], self.path, self.path_length, self.prob_trans, xp, self.zero_padding)
self.yseq -= xp.exp(label_prob - total_probability[:, None])
if self.reduce == 'mean':
self.yseq *= grad_output[0] / batch_size
else:
self.yseq *= grad_output[0][..., None]
# mask
self.yseq *= (xp.arange(len(self.yseq))[:, None] < self.input_length)[..., None]
return (None, None, None, None) + tuple([y for y in self.yseq])
# xs????
评论列表
文章目录