def forward_gpu(self, inputs):
x, t, W = inputs
max_length = cuda.reduce(
'T t, raw T begins', 'T out', 'begins[t + 1] - begins[t]',
'max(a, b)', 'out = a', '0',
'binary_hierarchical_softmax_max_length')(t, self.begins)
max_length = cuda.to_cpu(max_length)[()]
length = max_length * x.shape[0]
ls = cuda.cupy.empty((length,), dtype=numpy.float32)
n_in = x.shape[1]
wxy = cuda.cupy.empty_like(ls)
cuda.elementwise(
'''raw T x, raw T w, raw int32 ts, raw int32 paths,
raw T codes, raw int32 begins, int32 c, int32 max_length''',
'T ls, T wxy',
'''
int ind = i / max_length;
int offset = i - ind * max_length;
int t = ts[ind];
int begin = begins[t];
int length = begins[t + 1] - begins[t];
if (offset < length) {
int p = begin + offset;
int node = paths[p];
T wx = 0;
for (int j = 0; j < c; ++j) {
int w_ind[] = {node, j};
int x_ind[] = {ind, j};
wx += w[w_ind] * x[x_ind];
}
wxy = wx * codes[p];
ls = log(1 + exp(-wxy));
} else {
ls = 0;
}
''',
'binary_hierarchical_softmax_forward'
)(x, W, t, self.paths, self.codes, self.begins, n_in, max_length, ls,
wxy)
self.max_length = max_length
self.wxy = wxy
return ls.sum(),
评论列表
文章目录