python类broadcast_to()的实例源码-第2页-面圈网

lifted_struct_loss.py 文件源码项目：deep_metric_learning 作者: ronekko 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def squared_distance_matrix(X):
    n = X.shape[0]
    XX = F.sum(X ** 2.0, axis=1)
    distances = -2.0 * F.linear(X, X)
    distances = distances + F.broadcast_to(XX, (n, n))
    distances = distances + F.broadcast_to(F.expand_dims(XX, 1), (n, n))
    return distances

angular_loss.py 文件源码项目：deep_metric_learning 作者: ronekko 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha + matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))

train_word2vec.py 文件源码项目：vsmlib 作者: undertherain 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __call__(self, x, context):
        e = self.embed(context)
        shape = e.shape
        x = F.broadcast_to(x[:, None], (shape[0], shape[1]))
        e = F.reshape(e, (shape[0] * shape[1], shape[2]))
        x = F.reshape(x, (shape[0] * shape[1],))
        loss = self.loss_func(e, x)
        reporter.report({'loss': loss}, self)
        return loss

utils_subword_rnn.py 文件源码项目：vsmlib 作者: undertherain 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def __call__(self, x, context):

        x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
        x = F.reshape(x, (context.shape[0] * context.shape[1],))

        context = context.reshape((context.shape[0] * context.shape[1]))
        e = self.rnn.charRNN(context)

        loss = self.loss_func(e, x)
        reporter.report({'loss': loss}, self)
        return loss

nn.py 文件源码项目：adversarial-autoencoder 作者: musyoku 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __call__(self, x):
        return functions.broadcast_to(x, self.shape)

normalize.py 文件源码项目：chainercv 作者: chainer 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __call__(self, x):
        """Normalize input and scale it.

        Args:
            x (chainer.Variable): A variable holding 4-dimensional array.
                Its :obj:`dtype` is :obj:`numpy.float32`.

        Returns:
            chainer.Variable:
            The shape and :obj:`dtype` are same as those of input.
        """

        x = F.normalize(x, eps=self.eps, axis=1)
        scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape)
        return x * scale

attentions.py 文件源码项目：nmtrain 作者: philip30 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def __call__(self, S, h):
    batch_size, src_len, hidden_size = S.data.shape
    h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len))
    h = F.swapaxes(h, 1, 2)
    S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size))
    a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len)))
    return a

vfm.py 文件源码项目：vfm 作者: cemoody 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def term_slop(self, loc, val, bs, nf, train=True):
        """ Compute the slope for each active feature.
        """
        shape = (bs, nf)

        # Reshape all of our constants
        pr_mu = F.broadcast_to(self.slop_mu.b, shape)
        pr_lv = F.broadcast_to(self.slop_lv.b, shape)
        # This is either zero or a very negative number
        # indicating to sample N(mean, logvar) or just draw
        # the mean preicsely
        if not train:
            pr_lv += self.lv_floor

        # The feature slopes are grouped together so that they
        # all share a common mean. Then individual features slop_delta_lv
        # are shrunk towards zero, which effectively sets features to fall
        # back on the group mean.
        sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu
        sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv
        coef = F.gaussian(sl_mu, sl_lv)
        slop = F.sum(coef * val, axis=1)

        # Calculate divergence between group mean and N(0, 1)
        kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b)
        # Calculate divergence of individual delta means and delta vars
        args = (self.slop_delta_mu.W, self.slop_delta_lv.W)
        kld2 = F.gaussian_kl_divergence(*args)

        return slop, kld1 + kld2

auto_vfm.py 文件源码项目：vfm 作者: cemoody 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def kl_div(mu1, lv1, lv2):
    # KL Divergence between given normal and prior at N(0, sigma_2)
    # Prior assumes mean at zero
    # lns2 - lns1 + (s2^2 + (u1 - u2)**2)/ 2s2**2 - 0.5
    if len(lv1.shape) == 2:
        lv1 = F.expand_dims(lv1, 0)
        mu1 = F.expand_dims(mu1, 0)
    lv2 = F.broadcast_to(lv2, lv1.shape)
    v12 = F.exp(lv1)**2.0
    v22 = F.exp(lv2)**2.0
    return lv2 - lv1 + .5 * v12 / v22 + .5 * mu1**2. / v22 - .5

auto_vfm.py 文件源码项目：vfm 作者: cemoody 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
        # Change all of the shapes to form interaction vectors
        shape = (bs, nf * 2, self.n_dim)
        feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
        feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
        if not train:
            feat_lv_vec += self.lv_floor

        # Construct the interaction mean and variance
        # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
        # dot(feat, feat) is (bs, nf)
        ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
                          feat_lv_vec + self.feat_delta_lv(iloc))
        jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
                          feat_lv_vec + self.feat_delta_lv(jloc))
        # feat is (bs, )
        feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)

        # Compute the KLD for the group mean vector and variance vector
        # KL(N(group mu, group lv) || N(0, hyper_lv))
        # hyper_lv ~ gamma(1, 1)
        kldg = F.sum(kl_div(self.feat_mu_vec.b, self.feat_lv_vec.b,
                            self.hyper_feat_lv_vec.b))
        # Compute deviations from hyperprior
        # KL(N(delta_i, delta_i lv) || N(0, hyper_delta_lv))
        # hyper_delta_lv ~ gamma(1, 1)
        kldi = F.sum(kl_div(self.feat_delta_mu.W, self.feat_delta_lv.W,
                            self.hyper_feat_delta_lv.b))
        # Hyperprior penalty for log(var) ~ Gamma(alpha=1, beta=1)
        # Gamma(log(var) | alpha=1, beta=1) = -log(var)
        # The loss function will attempt to make log(var) as negative as 
        # possible which will in turn make the variance as small as possible
        # The sum just casts a 1D vector to a scalar
        hyperg = -F.sum(self.hyper_feat_lv_vec.b)
        hyperi = -F.sum(self.hyper_feat_delta_lv.b)
        return feat, kldg, kldi, hyperg, hyperi

updater.py 文件源码项目：Semantic-Segmentation-using-Adversarial-Networks 作者: oyam 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def _make_dis_input(self, input_img, label_map):
        b = F.broadcast_to(input_img[:,0,:,:], shape=label_map.shape)
        g = F.broadcast_to(input_img[:,1,:,:], shape=label_map.shape)
        r = F.broadcast_to(input_img[:,2,:,:], shape=label_map.shape)
        product_b = label_map * b
        product_g = label_map * g
        product_r = label_map * r
        dis_input = F.concat([product_b, product_g, product_r], axis=1)
        return dis_input

convolution_rbm.py 文件源码项目：SeRanet 作者: corochann 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def free_energy(self, v):
        """
        :param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
        :return: scalar
        """
        batch_size = v.data.shape[0]
        in_channels = self.in_channels
        real = self.real
        if real == 0:
            '''
            visible layer is 0, 1 (bit)
            vbias_term = 1 * SUM(a(i) * v(i))
            '''
            v_sum = F.sum(v, axis=(2, 3))  # sum over image_height & image_width
            # Originally, it should return sum for each batch.
            # but it returns scalar, which is sum over batches, since sum is used at the end anyway.
            vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
            wx_b = self.conv(v)

        else:
            '''
            visible layer takes real value
            vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
            '''
            #TODO: check
            #m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
            n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
            xp = cuda.get_array_module(n.data)
            std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))

            #v_ = v - F.matmul(m, n)
            v_ = (v - F.broadcast_to(n, v.data.shape)) / std_ch
            vbias_term = F.sum(0.5 * v_ * v_)
            wx_b = self.conv(v / std_ch)


        hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
        # print('vbias = ', vbias_term.data, ', hidden = ', hidden_term.data, 'F.exp(wx_b) = ', F.exp(wx_b).data)
        return - vbias_term - hidden_term

mellowmax.py 文件源码项目：chainerrl 作者: chainer 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def maximum_entropy_mellowmax(values, omega=1., beta_min=-10, beta_max=10):
    """Maximum entropy mellowmax policy function.

    This function provides a categorical distribution whose expectation matches
    the one of mellowmax function while maximizing its entropy.

    See: http://arxiv.org/abs/1612.05628

    Args:
        values (Variable or ndarray):
            Input values. Mellowmax is taken along the second axis.
        omega (float):
            Parameter of mellowmax.
        beta_min (float):
            Minimum value of beta, used in Brent's algorithm.
        beta_max (float):
            Maximum value of beta, used in Brent's algorithm.
    Returns:
        outputs (Variable)
    """
    xp = chainer.cuda.get_array_module(values)
    mm = mellowmax(values, axis=1)

    # Advantage: Q - mellowmax(Q)
    batch_adv = values - F.broadcast_to(F.expand_dims(mm, 1), values.shape)
    # Move data to CPU because we use Brent's algorithm in scipy
    batch_adv = chainer.cuda.to_cpu(batch_adv.data)
    batch_beta = np.empty(mm.shape, dtype=np.float32)

    # Beta is computed as the root of this function
    def f(y, adv):
        return np.sum(np.exp(y * adv) * adv)

    for idx in np.ndindex(mm.shape):
        idx_full = idx[:1] + (slice(None),) + idx[1:]
        adv = batch_adv[idx_full]
        try:
            beta = scipy.optimize.brentq(
                f, a=beta_min, b=beta_max, args=(adv,))
        except ValueError:
            beta = 0
        batch_beta[idx] = beta

    return F.softmax(xp.expand_dims(xp.asarray(batch_beta), 1) * values)

gaussian_policy.py 文件源码项目：chainerrl 作者: chainer 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self, n_input_channels, action_size, var,
                 n_hidden_layers=0, n_hidden_channels=None,
                 min_action=None, max_action=None, bound_mean=False,
                 nonlinearity=F.relu, mean_wscale=1):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        if np.isscalar(var):
            self.var = np.full(action_size, var, dtype=np.float32)
        else:
            self.var = var
        layers = []
        if n_hidden_layers > 0:
            # Input to hidden
            layers.append(L.Linear(n_input_channels, n_hidden_channels))
            layers.append(self.nonlinearity)
            for _ in range(n_hidden_layers - 1):
                # Hidden to hidden
                layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
                layers.append(self.nonlinearity)
            # The last layer is used to compute the mean
            layers.append(
                L.Linear(n_hidden_channels, action_size,
                         initialW=LeCunNormal(mean_wscale)))
        else:
            # There's only one layer for computing the mean
            layers.append(
                L.Linear(n_input_channels, action_size,
                         initialW=LeCunNormal(mean_wscale)))

        if self.bound_mean:
            layers.append(lambda x: bound_by_tanh(
                x, self.min_action, self.max_action))

        def get_var_array(shape):
            self.var = self.xp.asarray(self.var)
            return self.xp.broadcast_to(self.var, shape)

        layers.append(lambda x: distribution.GaussianDistribution(
            x, get_var_array(x.shape)))
        super().__init__(*layers)

qrnn.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __call__(self, X, ht_enc, H_enc, skip_mask=None):
        pad = self._kernel_size - 1
        WX = self.W(X)
        if pad > 0:
            WX = WX[:, :, :-pad]
        Vh = self.V(ht_enc)
        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        self.contexts = []
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if t == 0:
                ct = (1 - f) * z
                self.contexts.append(ct)
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if t == 0:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H

qrnn.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad-1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H

LSTMEncDecAttn.py 文件源码项目：mlpnlp-nmt 作者: mlpnlp 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def calcAttention(self, h1, hList, aList, encLen, cMBSize, args):
        # attention????????????????h1???
        if self.attn_mode == 0:
            return h1
        # 1, attention????????
        target1 = self.model.attnIn_L1(h1)  # ??????
        # (cMBSize, self.hDim) => (cMBSize, 1, self.hDim)
        target2 = chaFunc.expand_dims(target1, axis=1)
        # (cMBSize, 1, self.hDim) => (cMBSize, encLen, self.hDim)
        target3 = chaFunc.broadcast_to(target2, (cMBSize, encLen, self.hDim))
        # target3 = chaFunc.broadcast_to(chaFunc.reshape(
        #    target1, (cMBSize, 1, self.hDim)), (cMBSize, encLen, self.hDim))
        # 2, attention?????????
        if self.attn_mode == 1:  # bilinear
            # bilinear??attention?????hList1 == hList2 ???
            # shape: (cMBSize, encLen)
            aval = chaFunc.sum(target3 * aList, axis=2)
        elif self.attn_mode == 2:  # MLP
            # attnSum ????????
            t1 = chaFunc.reshape(target3, (cMBSize * encLen, self.hDim))
            # (cMBSize*encLen, self.hDim) => (cMBSize*encLen, 1)
            t2 = self.model.attnSum(chaFunc.tanh(t1 + aList))
            # shape: (cMBSize, encLen)
            aval = chaFunc.reshape(t2, (cMBSize, encLen))
            # aval = chaFunc.reshape(self.model.attnSum(
            #    chaFunc.tanh(t1 + aList)), (cMBSize, encLen))
        else:
            assert 0, "ERROR"
        # 3, softmax????
        cAttn1 = chaFunc.softmax(aval)   # (cMBSize, encLen)
        # 4, attention???????context vector????????
        # (cMBSize, encLen) => (cMBSize, 1, encLen)
        cAttn2 = chaFunc.expand_dims(cAttn1, axis=1)
        # (1, encLen) x (encLen, hDim) ?????(matmul)?cMBSize?????
        #     => (cMBSize, 1, hDim)
        cAttn3 = chaFunc.batch_matmul(cAttn2, hList)
        # cAttn3 = chaFunc.batch_matmul(chaFunc.reshape(
        #    cAttn1, (cMBSize, 1, encLen)), hList)
        # axis=1???1????????????
        context = chaFunc.reshape(cAttn3, (cMBSize, self.hDim))
        # 4, attention???????context vector????????
        # ??????????
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn2 = chaFunc.reshape(cAttn1, (cMBSize, encLen, 1))
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn3 = chaFunc.broadcast_to(cAttn2, (cMBSize, encLen, self.hDim))
        # ???????? (cMBSize, encLen, hDim)
        #     => (cMBSize, hDim)  # axis=1 ?????
        # context = chaFunc.sum(aList * cAttn3, axis=1)
        # 6, attention??????????
        c1 = chaFunc.concat((h1, context))
        c2 = self.model.attnOut_L2(c1)
        finalH = chaFunc.tanh(c2)
        # finalH = chaFunc.tanh(self.model.attnOut_L2(
        #    chaFunc.concat((h1, context))))
        return finalH  # context

    # ??????