def multilinear(emb, tuples, l2=0):
"""
Compute the dot product of real vectors at selected embeddings
Note that this model is called Cannonical Parafac (CP), and corresponds to the "distmult" model in some scientific
publications on relational database factorization.
:param emb: embedding matrix of size [n_emb, rank] containing float numbers
:param tuples: tuple matrix of size [n_t, arity] containing integers
:param l2: optional l2 regularization strength that is added to the score. If it is different from 0, the function
returns a pair (pred, l2norm) where pred is the sample prediction, but l2norm is the l2 norm of the selected
embeddings
:return: the multilinear dot product between selected embeddings S[i] = sum_j prod_k E[I[i,k],j]
>>> embeddings = [[1., 1, 0, 3], [0, 1, 0, 1], [-1, 1, 1, 5]]
>>> idx = tf.Variable([[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1]])
>>> g = multilinear(embeddings, idx)
>>> print(tf_eval(g))
[ 4. 4. 15. 15. 6. 6.]
"""
emb_sel = tf.gather(emb, tuples)
pred = tf.reduce_sum(tf.reduce_prod(emb_sel, 1), 1)
if l2 == 0: # unregularized prediction ==> returns only the predictions
return pred
else: # l2 regularization of the selected embeddings
reg = l2 * tf.reduce_sum(tf.square(emb_sel))
return pred, reg
评论列表
文章目录