def term_bias(self, bs, train=True):
""" Compute overall bias and broadcast to shape of batchsize
"""
shape = (bs, 1,)
# Bias is drawn from a Gaussian with given mu and log variance
bs_mu = F.broadcast_to(self.bias_mu.b, shape)
bs_lv = F.broadcast_to(self.bias_lv.b, shape)
bias = F.flatten(F.gaussian(bs_mu, bs_lv))
# Add a very negative log variance so we're sampling
# from a very narrow distribution about the mean.
# Useful for validation dataset when we want to only guess
# the mean.
if not train:
bs_lv += self.lv_floor
# Compute prior on the bias, so compute the KL div
# from the KL(N(mu_bias, var_bias) | N(0, 1))
kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
return bias, kld
python类flatten()的实例源码
def term_bias(self, bs, train=True):
""" Compute overall bias and broadcast to shape of batchsize
"""
shape = (bs, 1,)
# Bias is drawn from a Gaussian with given mu and log variance
bs_mu = F.broadcast_to(self.bias_mu.b, shape)
bs_lv = F.broadcast_to(self.bias_lv.b, shape)
bias = F.flatten(F.gaussian(bs_mu, bs_lv))
# Add a very negative log variance so we're sampling
# from a very narrow distribution about the mean.
# Useful for validation dataset when we want to only guess
# the mean.
if not train:
bs_lv += self.lv_floor
# Compute prior on the bias, so compute the KL div
# from the KL(N(mu_bias, var_bias) | N(0, 1))
kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
return bias, kld
def _elementwise_softmax_cross_entropy(x, t):
assert x.shape[:-1] == t.shape
shape = t.shape
x = F.reshape(x, (-1, x.shape[-1]))
t = F.flatten(t)
return F.reshape(
F.softmax_cross_entropy(x, t, reduce='no'), shape)
def image_to_feature(self, image_np):
"""
???RGB, (3, 100, 100)?numpy.array????????????????????
"""
_train = chainer.config.train
chainer.config.train = False
x = chainer.Variable(numpy.array([image_np], dtype=numpy.float32))
feature_vector = F.flatten(self.fe.reduct(x)).data
chainer.config.train = _train
return feature_vector
def forward(self, ws, ss, ps, ls, dep_ts=None):
batchsize, slen = ws.shape
xp = chainer.cuda.get_array_module(ws[0])
wss = self.emb_word(ws)
sss = F.reshape(self.emb_suf(ss), (batchsize, slen, 4 * self.afix_dim))
pss = F.reshape(self.emb_prf(ps), (batchsize, slen, 4 * self.afix_dim))
ins = F.dropout(F.concat([wss, sss, pss], 2), self.dropout_ratio, train=self.train)
xs_f = F.transpose(ins, (1, 0, 2))
xs_b = xs_f[::-1]
cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize)
_, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train)
_, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train)
# (batch, length, hidden_dim)
hs = F.transpose(F.concat([hs_f, hs_b[::-1]], 2), (1, 0, 2))
dep_ys = self.biaffine_arc(
F.elu(F.dropout(self.arc_dep(hs), 0.32, train=self.train)),
F.elu(F.dropout(self.arc_head(hs), 0.32, train=self.train)))
if dep_ts is not None and random.random >= 0.5:
heads = dep_ts
else:
heads = F.flatten(F.argmax(dep_ys, axis=2)) + \
xp.repeat(xp.arange(0, batchsize * slen, slen), slen)
hs = F.reshape(hs, (batchsize * slen, -1))
heads = F.permutate(
F.elu(F.dropout(
self.rel_head(hs), 0.32, train=self.train)), heads)
childs = F.elu(F.dropout(self.rel_dep(hs), 0.32, train=self.train))
cat_ys = self.biaffine_tag(childs, heads)
dep_ys = F.split_axis(dep_ys, batchsize, 0) if batchsize > 1 else [dep_ys]
dep_ys = [F.reshape(v, v.shape[1:])[:l, :l] for v, l in zip(dep_ys, ls)]
cat_ys = F.split_axis(cat_ys, batchsize, 0) if batchsize > 1 else [cat_ys]
cat_ys = [v[:l] for v, l in zip(cat_ys, ls)]
return cat_ys, dep_ys