def predict(self, tokens):
self.train = False
contexts = self.feature_extract(tokens) \
if isinstance(tokens[0], unicode) else tokens
# contexts [(w, c, l), (w, c, l)]
ws, cs, ls = zip(*contexts)
max_cs_size = max(c.shape[1] for c in cs)
new_cs = []
for c in cs:
c = np.pad(c, ((0, 0), (0, max_cs_size - c.shape[1])),
mode='constant', constant_values=-1)
new_cs.append(c)
ws = np.asarray(ws, 'i')
cs = np.asarray(new_cs, 'i')
ls = np.asarray(ls, 'f')
h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim)
h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim)
batchsize, windowsize, _, _ = h_c.data.shape
# (batchsize, windowsize, char_dim)
h_c = F.sum(h_c, 2)
h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1)))
h_c = h_c / ls
h = F.concat([h_w, h_c], 2)
h = F.reshape(h, (batchsize, -1))
# ys = self.linear(h)
h = F.relu(self.linear1(h))
h = F.dropout(h, ratio=.5, train=self.train)
ys = self.linear2(h)
return ys.data
评论列表
文章目录