def diagonal_bilinear_attention(seq1, seq2, len2, scaled=True, with_sentinel=True):
v = tf.get_variable('attn_weight', [1, 1, seq1.get_shape()[-1].value], tf.float32,
initializer=tf.ones_initializer())
attn_scores = tf.einsum('abc,adc->abd', v * seq1, seq2)
attn_scores += tf.layers.dense(seq1, 1, use_bias=False)
attn_scores += tf.transpose(tf.layers.dense(seq2, 1, use_bias=False), [0, 2, 1])
if scaled:
attn_scores /= math.sqrt(float(seq1.get_shape()[-1].value))
return apply_attention(attn_scores, seq2, len2, seq1 is seq2, with_sentinel)
评论列表
文章目录