def term_slop(self, loc, val, bs, nf, train=True):
""" Compute the slope for each active feature.
"""
shape = (bs, nf)
# Reshape all of our constants
pr_mu = F.broadcast_to(self.slop_mu.b, shape)
pr_lv = F.broadcast_to(self.slop_lv.b, shape)
# This is either zero or a very negative number
# indicating to sample N(mean, logvar) or just draw
# the mean preicsely
if not train:
pr_lv += self.lv_floor
# The feature slopes are grouped together so that they
# all share a common mean. Then individual features slop_delta_lv
# are shrunk towards zero, which effectively sets features to fall
# back on the group mean.
sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu
sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv
coef = F.gaussian(sl_mu, sl_lv)
slop = F.sum(coef * val, axis=1)
# Calculate divergence between group mean and N(0, 1)
kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b)
# Calculate divergence of individual delta means and delta vars
args = (self.slop_delta_mu.W, self.slop_delta_lv.W)
kld2 = F.gaussian_kl_divergence(*args)
return slop, kld1 + kld2
评论列表
文章目录