def ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None):
"""Matrix band part of ones."""
if all([isinstance(el, int) for el in [rows, cols, num_lower, num_upper]]):
# Needed info is constant, so we construct in numpy
if num_lower < 0:
num_lower = rows - 1
if num_upper < 0:
num_upper = cols - 1
lower_mask = np.tri(rows, cols, num_lower).T
upper_mask = np.tri(rows, cols, num_upper)
band = np.ones((rows, cols)) * lower_mask * upper_mask
if out_shape:
band = band.reshape(out_shape)
band = tf.constant(band, tf.float32)
else:
band = tf.matrix_band_part(tf.ones([rows, cols]),
tf.cast(num_lower, tf.int64),
tf.cast(num_upper, tf.int64))
if out_shape:
band = tf.reshape(band, out_shape)
return band
python类matrix_band_part()的实例源码
def _tr_term(self, logits_arr, Np):
"""Get the TR reg term given a loits_arr consisting of Np
different logits (number of classes = K) of transformations of batches
of size B. This term is just the average squared distance between the
logits of a pair of passes for a data point, averaged over the batch.
See https://papers.nips.cc/paper/6333-regularization-with-stochastic-
transformations-and-perturbations-for-deep-semi-supervised-learning.pdf
"""
# Reshape to [B, Np, K]
A = tf.transpose(logits_arr.stack(), [1, 0, 2])
# ||a_{ij}||_2^2; note element-wise multiply here
R = tf.reshape(tf.reduce_sum(A * A, 2), [-1, Np, 1])
# ||a_{ji}||_2^2
R_t = tf.transpose(R, [0, 2, 1])
# a_{ij}a_{ji}
S = tf.matmul(A, tf.transpose(A, [0, 2, 1]))
# Pairwise distance matrix (a_{ij} - a_{ji})^2
D = R - 2 * S + R_t
# Lower triangular part (don't double count)
D_lt = tf.matrix_band_part(D, -1, 0)
# Take mean across over distinct pairs & batch size
return tf.reduce_mean(tf.reduce_sum(D_lt, axis=2))
a2_multi_head_attention.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def get_mask(batch_size,sequence_length):
lower_triangle=tf.matrix_band_part(tf.ones([sequence_length,sequence_length]),-1,0)
result=-1e9*(1.0-lower_triangle)
print("get_mask==>result:",result)
return result
#multi_head_attention_for_sentence_vectorized(0)
def get_mask(self,sequence_length):
lower_triangle = tf.matrix_band_part(tf.ones([sequence_length, sequence_length]), -1, 0)
result = -1e9 * (1.0 - lower_triangle)
print("get_mask==>result:", result)
return result
# test started: learn to output reverse sequence of itself.
a2_transformer_classification.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def get_mask(self,sequence_length):
lower_triangle = tf.matrix_band_part(tf.ones([sequence_length, sequence_length]), -1, 0)
result = -1e9 * (1.0 - lower_triangle)
print("get_mask==>result:", result)
return result
# test started: learn to predict the bigger number in two numbers from specific location of array.
def get_mask(batch_size,sequence_length):
lower_triangle=tf.matrix_band_part(tf.ones([sequence_length,sequence_length]),-1,0)
result=-1e9*(1.0-lower_triangle)
print("get_mask==>result:",result)
return result
def get_mask(sequence_length):
lower_triangle=tf.matrix_band_part(tf.ones([sequence_length,sequence_length]),-1,0)
result=-1e9*(1.0-lower_triangle)
print("get_mask==>result:",result)
return result
def get_L_sym(self, L_vec_var):
L = tf.reshape(L_vec_var, (-1, self._action_dim, self._action_dim))
return tf.matrix_band_part(L, -1, 0) - \
tf.matrix_diag(tf.matrix_diag_part(L)) + \
tf.matrix_diag(tf.exp(tf.matrix_diag_part(L)))
def _build_likelihood(self):
"""
This method computes the variational lower bound on the likelihood,
which is:
E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
with
q(\\mathbf f) = N(\\mathbf f \\,|\\, \\boldsymbol \\mu, \\boldsymbol \\Sigma)
"""
# Get prior KL.
KL = gauss_kl(self.q_mu, self.q_sqrt)
# Get conditionals
K = self.kern.K(self.X) + tf.eye(self.num_data, dtype=settings.float_type) * \
settings.numerics.jitter_level
L = tf.cholesky(K)
fmean = tf.matmul(L, self.q_mu) + self.mean_function(self.X) # NN,ND->ND
q_sqrt_dnn = tf.matrix_band_part(tf.transpose(self.q_sqrt, [2, 0, 1]), -1, 0) # D x N x N
L_tiled = tf.tile(tf.expand_dims(L, 0), tf.stack([self.num_latent, 1, 1]))
LTA = tf.matmul(L_tiled, q_sqrt_dnn) # D x N x N
fvar = tf.reduce_sum(tf.square(LTA), 2)
fvar = tf.transpose(fvar)
# Get variational expectations.
var_exp = self.likelihood.variational_expectations(fmean, fvar, self.Y)
return tf.reduce_sum(var_exp) - KL
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False):
# compute kernel stuff
num_func = tf.shape(f)[1] # K
Lm = tf.cholesky(Kmm)
# Compute the projection matrix A
A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)
# compute the covariance due to the conditioning
if full_cov:
fvar = Knn - tf.matmul(A, A, transpose_a=True)
shape = tf.stack([num_func, 1, 1])
else:
fvar = Knn - tf.reduce_sum(tf.square(A), 0)
shape = tf.stack([num_func, 1])
fvar = tf.tile(tf.expand_dims(fvar, 0), shape) # K x N x N or K x N
# another backsubstitution in the unwhitened case
if not white:
A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)
# construct the conditional mean
fmean = tf.matmul(A, f, transpose_a=True)
if q_sqrt is not None:
if q_sqrt.get_shape().ndims == 2:
LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # K x M x N
elif q_sqrt.get_shape().ndims == 3:
L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # K x M x M
A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
LTA = tf.matmul(L, A_tiled, transpose_a=True) # K x M x N
else: # pragma: no cover
raise ValueError("Bad dimension for q_sqrt: %s" %
str(q_sqrt.get_shape().ndims))
if full_cov:
fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # K x N x N
else:
fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # K x N
fvar = tf.transpose(fvar) # N x K or N x N x K
return fmean, fvar
def _build_q_head(self, input_state):
self.w_value, self.b_value, self.value = layers.fc('fc_value', input_state, 1, activation='linear')
self.w_L, self.b_L, self.L_full = layers.fc('L_full', input_state, self.num_actions, activation='linear')
self.w_mu, self.b_mu, self.mu = layers.fc('mu', input_state, self.num_actions, activation='linear')
#elements above the main diagonal in L_full are unused
D = tf.matrix_band_part(tf.exp(self.L_full) - L_full, 0, 0)
L = tf.matrix_band_part(L_full, -1, 0) + D
LT_u_minus_mu = tf.einsum('ikj,ik', L, self.selected_action_ph - self.mu)
self.advantage = tf.einsum('ijk,ikj->i', LT_u_minus_mu, LT_u_minus_mu)
q_selected_action = self.value + self.advantage
diff = tf.subtract(self.target_ph, q_selected_action)
return self._value_function_loss(diff)
def predict(self, answer, start_logits, end_logits, mask) -> Prediction:
l = tf.shape(start_logits)[1]
masked_start_logits = exp_mask(start_logits, mask)
masked_end_logits = exp_mask(end_logits, mask)
# Explicit score for each span
span_scores = tf.expand_dims(start_logits, 2) + tf.expand_dims(end_logits, 1)
# Mask for in-bound spans, now (batch, start, end) matrix
mask = tf.sequence_mask(mask, l)
mask = tf.logical_and(tf.expand_dims(mask, 2), tf.expand_dims(mask, 1))
# Also mask out spans that are negative/inverse by taking only the upper triangle
mask = tf.matrix_band_part(mask, 0, self.bound)
# Apply the mask
mask = tf.cast(mask, tf.float32)
span_scores = span_scores * mask + (1 - mask) * VERY_NEGATIVE_NUMBER
if len(answer) == 1:
answer = answer[0]
span_scores = tf.reshape(span_scores, (tf.shape(start_logits)[0], -1))
answer = answer[:, 0] * l + answer[:, 1]
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=span_scores, labels=answer)
loss = tf.reduce_mean(losses)
else:
raise NotImplemented()
tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
return BoundaryPrediction(tf.nn.softmax(masked_start_logits),
tf.nn.softmax(masked_end_logits),
masked_start_logits, masked_end_logits, mask)
def attention_bias(inputs, mode, inf=-1e9, name=None):
""" A bias tensor used in attention mechanism
:param inputs:
:param mode:
:param inf:
:param name:
:returns:
"""
with tf.name_scope(name, default_name="attention_bias", values=[inputs]):
if mode == "causal":
length = inputs
lower_triangle = tf.matrix_band_part(
tf.ones([length, length]), -1, 0
)
ret = inf * (1.0 - lower_triangle)
return tf.reshape(ret, [1, 1, length, length])
elif mode == "masking":
mask = inputs
ret = (1.0 - mask) * inf
return tf.expand_dims(tf.expand_dims(ret, 1), 1)
elif mode == "proximal":
length = inputs
r = tf.to_float(tf.range(length))
diff = tf.expand_dims(r, 0) - tf.expand_dims(r, 1)
m = tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0)
return m
elif mode == "distance":
length, distance = inputs
distance = tf.where(distance > length, 0, distance)
distance = tf.cast(distance, tf.int64)
lower_triangle = tf.matrix_band_part(
tf.ones([length, length]), -1, 0
)
mask_triangle = 1.0 - tf.matrix_band_part(
tf.ones([length, length]), distance - 1, 0
)
ret = inf * (1.0 - lower_triangle + mask_triangle)
return tf.reshape(ret, [1, 1, length, length])
else:
raise ValueError("Unknown mode %s" % mode)