scoring.py 文件源码-python代码片段

def multilinear(emb, tuples, l2=0):
    """
    Compute the dot product of real vectors at selected embeddings
    Note that this model is called Cannonical Parafac (CP), and corresponds to the "distmult" model in some scientific
    publications on relational database factorization.
    :param emb: embedding matrix of size [n_emb, rank] containing float numbers
    :param tuples: tuple matrix of size [n_t, arity] containing integers
    :param l2: optional l2 regularization strength that is added to the score. If it is different from 0, the function
    returns a pair (pred, l2norm) where pred is the sample prediction, but l2norm is the l2 norm of the selected
    embeddings
    :return: the multilinear dot product between selected embeddings S[i] = sum_j prod_k E[I[i,k],j]

    >>> embeddings = [[1., 1, 0, 3], [0, 1, 0, 1], [-1, 1, 1, 5]]
    >>> idx = tf.Variable([[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1]])
    >>> g = multilinear(embeddings, idx)
    >>> print(tf_eval(g))
    [  4.   4.  15.  15.   6.   6.]
    """
    emb_sel = tf.gather(emb, tuples)

    pred = tf.reduce_sum(tf.reduce_prod(emb_sel, 1), 1)
    if l2 == 0:  # unregularized prediction ==> returns only the predictions
        return pred
    else:  # l2 regularization of the selected embeddings
        reg = l2 * tf.reduce_sum(tf.square(emb_sel))
        return pred, reg