def predict(self, xs):
"""
batch: list of splitted sentences
"""
batchsize = len(xs)
fs = [self.extractor.process(x)[:2] for x in xs]
ws, cs = concat_examples(fs, padding=IGNORE)
cat_ys, dep_ys = self.forward(ws, cs)
cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
# dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))
cat_ys = [F.log_softmax(
F.reshape(y, (y.shape[1], -1))[1:len(x) + 1]).data for x, y in \
zip(xs, F.split_axis(cat_ys, batchsize, 0))]
dep_ys = [F.log_softmax(y[1:len(x) + 1, :len(x) + 1]).data \
for x, y in zip(xs, dep_ys)]
assert len(cat_ys) == len(dep_ys)
return zip(cat_ys, dep_ys)
python类log_softmax()的实例源码
def __forward(self, batch_x, batch_t, weight, train=True):
xp = self.xp
x = Variable(xp.asarray(batch_x), volatile=not train)
t = Variable(xp.asarray(batch_t), volatile=not train)
y = self.net(x, train=train)
b, c, n = y.data.shape
mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t, self.net.rating_num)), volatile=not train)
if self.ordinal_weight == 0:
loss = F.sum(-F.log_softmax(y) * mask) / b
elif self.ordinal_weight == 1:
loss = ordinal_loss(y, mask)
else:
loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)
acc = self.__accuracy(y, t)
return loss, acc
def compute_fisher(self, dataset):
fisher_accum_list = [
np.zeros(var[1].shape) for var in self.variable_list]
for _ in range(self.num_samples):
x, _ = dataset[np.random.randint(len(dataset))]
y = self.predictor(np.array([x]))
prob_list = F.softmax(y)[0].data
class_index = np.random.choice(len(prob_list), p=prob_list)
loss = F.log_softmax(y)[0, class_index]
self.cleargrads()
loss.backward()
for i in range(len(self.variable_list)):
fisher_accum_list[i] += np.square(
self.variable_list[i][1].grad)
self.fisher_list = [
F_accum / self.num_samples for F_accum in fisher_accum_list]
return self.fisher_list
def all_log_prob(self):
with chainer.force_backprop_mode():
if self.min_prob > 0:
return F.log(self.all_prob)
else:
return F.log_softmax(self.beta * self.logits)
def log_probs(self):
return F.log_softmax(self.logits)
def predict(self, xs):
"""
batch: list of splitted sentences
"""
xs = [self.extractor.process(x) for x in xs]
ws, ss, ps = zip(*xs)
cat_ys, dep_ys = self.forward(ws, ss, ps)
return zip([y.data[1:-1] for y in cat_ys],
[F.log_softmax(y[1:-1, :-1]).data for y in dep_ys])
def predict(self, xs):
"""
batch: list of splitted sentences
"""
xs = [self.extractor.process(x) for x in xs]
ws, ss, ps = zip(*xs)
cat_ys, dep_ys = self.forward(ws, ss, ps)
return zip([F.log_softmax(y[1:-1]).data for y in cat_ys],
[F.log_softmax(y[1:-1, :-1]).data for y in dep_ys])
def predict(self, xs):
"""
batch: list of splitted sentences
"""
xs = [self.extractor.process(x) for x in xs]
batchsize = len(xs)
ws, cs, ls = zip(*xs)
cat_ys, dep_ys = self.forward(ws, cs, ls)
return zip([y.data[1:-1] for y in cat_ys], [F.log_softmax(y[1:-1, :-1]).data for y in dep_ys])
def predict(self, xs):
"""
batch: list of splitted sentences
"""
xs = [self.extractor.process(x) for x in xs]
ws, ss, ps, ls = concat_examples(xs)
cat_ys, dep_ys = self.forward(ws, ss, ps, ls)
return zip([F.log_softmax(y[1:-1]).data for y in cat_ys],
[F.log_softmax(y[1:-1, :-1]).data for y in dep_ys])
def _calc_top_n(self, model, x, state, beam_width):
o, state = model.decode_once(x, state, train=False)
o = F.log_softmax(o, use_cudnn=False)
o = chainer.cuda.to_cpu(o.data[0])
eos_score = o[self.EOS]
self._edit_probs(o)
inds = np.argpartition(o, len(o) - beam_width)
inds = inds[::-1][:beam_width]
return inds, o, state, eos_score
def check_forward(self, x_data, use_cudnn=True):
x = chainer.Variable(x_data)
y = functions.log_softmax(x, use_cudnn)
self.assertEqual(y.data.dtype, self.dtype)
log_z = numpy.ufunc.reduce(
numpy.logaddexp, self.x, axis=1, keepdims=True)
y_expect = self.x - log_z
gradient_check.assert_allclose(
y_expect, y.data, **self.check_forward_options)
def forward(self):
x = chainer.Variable(self.x)
return functions.log_softmax(x, use_cudnn=self.use_cudnn)
def __init__(self, use_cudnn=True):
self._function = "log_softmax"
self.use_cudnn = use_cudnn
def __call__(self, x):
return F.log_softmax(x, self.use_cudnn)
def __init__(self, use_cudnn=True):
self._function = "log_softmax"
self.use_cudnn = use_cudnn
def __call__(self, x):
return F.log_softmax(x, self.use_cudnn)
def __init__(self, use_cudnn=True):
self._function = "log_softmax"
self.use_cudnn = use_cudnn
def __call__(self, x):
return F.log_softmax(x, self.use_cudnn)
def __init__(self, use_cudnn=True):
self._function = "log_softmax"
self.use_cudnn = use_cudnn
def __call__(self, x):
return F.log_softmax(x, self.use_cudnn)
lstm_decoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def predict(self, s):
"""Predict single-label log probabilities
Args:
s (any): Current (hidden, cell) states.
Return:
(~chainer.Variable) log softmax vector
"""
y = self.out(self.proj(s[2][0]))
return F.log_softmax(y)
lstm_decoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def predict(self, s):
"""Predict single-label log probabilities
Args:
s (any): Current (hidden, cell) states.
Return:
(~chainer.Variable) log softmax vector
"""
y = self.out(self.proj(s[2][0]))
return F.log_softmax(y)
lstm_decoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def predict(self, s):
"""Predict single-label log probabilities
Args:
s (any): Current (hidden, cell) states.
Return:
(~chainer.Variable) log softmax vector
"""
y = self.out(self.proj(s[2][0]))
return F.log_softmax(y)
lstm_decoder.py 文件源码
项目:DSTC6-End-to-End-Conversation-Modeling
作者: dialogtekgeek
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def predict(self, s):
"""Predict single-label log probabilities
Args:
s (any): Current (hidden, cell) states.
Return:
(~chainer.Variable) log softmax vector
"""
y = self.out(self.proj(s[2][0]))
return F.log_softmax(y)
def beam_search(dec,state,y,data,beam_width,mydict_inv):
beam_width=beam_width
xp=cuda.cupy
batchsize=data.shape[0]
vocab_size=len(mydict_inv)
topk=20
route = np.zeros((batchsize,beam_width,50)).astype(np.int32)
for j in range(50):
if j == 0:
y = Variable(xp.array(np.argmax(y.data.get(), axis=1)).astype(xp.int32))
state,y = dec(y, state, train=False)
h=state['h1'].data
c=state['c1'].data
h=xp.tile(h.reshape(batchsize,1,-1), (1,beam_width,1))
c=xp.tile(c.reshape(batchsize,1,-1), (1,beam_width,1))
ptr=F.log_softmax(y).data.get()
pred_total_city = np.argsort(ptr)[:,::-1][:,:beam_width]
pred_total_score = np.sort(ptr)[:,::-1][:,:beam_width]
route[:,:,j] = pred_total_city
pred_total_city=pred_total_city.reshape(batchsize,beam_width,1)
else:
pred_next_score=np.zeros((batchsize,beam_width,topk))
pred_next_city=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
score2idx=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
for b in range(beam_width):
state={'c1':Variable(c[:,b,:]), 'h1':Variable(h[:,b,:])}
cur_city = xp.array([pred_total_city[i,b,j-1] for i in range(batchsize)]).astype(xp.int32)
state,y = dec(cur_city,state, train=False)
h[:,b,:]=state['h1'].data
c[:,b,:]=state['c1'].data
ptr=F.log_softmax(y).data.get()
pred_next_score[:,b,:]=np.sort(ptr, axis=1)[:,::-1][:,:topk]
pred_next_city[:,b,:]=np.argsort(ptr, axis=1)[:,::-1][:,:topk]
h=F.stack([h for i in range(topk)], axis=2).data
c=F.stack([c for i in range(topk)], axis=2).data
pred_total_city = np.tile(route[:,:,:j],(1,1,topk)).reshape(batchsize,beam_width,topk,j)
pred_next_city = pred_next_city.reshape(batchsize,beam_width,topk,1)
pred_total_city = np.concatenate((pred_total_city,pred_next_city),axis=3)
pred_total_score = np.tile(pred_total_score.reshape(batchsize,beam_width,1),(1,1,topk)).reshape(batchsize,beam_width,topk,1)
pred_next_score = pred_next_score.reshape(batchsize,beam_width,topk,1)
pred_total_score += pred_next_score
idx = pred_total_score.reshape(batchsize,beam_width * topk).argsort(axis=1)[:,::-1][:,:beam_width]
pred_total_city = pred_total_city[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,j+1)
pred_total_score = pred_total_score[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,1)
h = h[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)
c = c[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)
route[:,:,:j+1] =pred_total_city
if (pred_total_city[:,:,j] == 15).all():
break
return route[:,0,:j+1].tolist()