def _symmetric_matrix_square_root(mat, eps=1e-10):
"""Compute square root of a symmetric matrix.
Note that this is different from an elementwise square root. We want to
compute M' where M' = sqrt(mat) such that M' * M' = mat.
Also note that this method **only** works for symmetric matrices.
Args:
mat: Matrix to take the square root of.
eps: Small epsilon such that any element less than eps will not be square
rooted to guard against numerical instability.
Returns:
Matrix square root of mat.
"""
# Unlike numpy, tensorflow's return order is (s, u, v)
s, u, v = tf.svd(mat)
# sqrt is unstable around 0, just use 0 in such case
si = tf.where(tf.less(s, eps), s, tf.sqrt(s))
# Note that the v returned by Tensorflow is v = V
# (when referencing the equation A = U S V^T)
# This is unlike Numpy which returns v = V^T
return tf.matmul(
tf.matmul(u, tf.diag(si)), v, transpose_b=True)
python类svd()的实例源码
def _embed_sentences(self):
"""Tensorflow implementation of Simple but Tough-to-Beat Baseline"""
# Get word features
word_embeddings = self._get_embedding()
word_feats = tf.nn.embedding_lookup(word_embeddings, self.input)
# Get marginal estimates and scaling term
batch_size = tf.shape(word_feats)[0]
a = tf.pow(10.0, self._get_a_exp())
p = tf.constant(self.marginals, dtype=tf.float32, name='marginals')
q = tf.reshape(
a / (a + tf.nn.embedding_lookup(p, self.input)),
(batch_size, self.mx_len, 1)
)
# Compute initial sentence embedding
z = tf.reshape(1.0 / tf.to_float(self.input_lengths), (batch_size, 1))
S = z * tf.reduce_sum(q * word_feats, axis=1)
# Compute common component
S_centered = S - tf.reduce_mean(S, axis=0)
_, _, V = tf.svd(S_centered, full_matrices=False, compute_uv=True)
self.tf_ccx = tf.stop_gradient(tf.gather(tf.transpose(V), 0))
# Common component removal
ccx = tf.reshape(self._get_common_component(), (1, self.d))
sv = {'embeddings': word_embeddings, 'a': a, 'p': p, 'ccx': ccx}
return S - tf.matmul(S, ccx * tf.transpose(ccx)), sv
def update_scipy_svd(self):
sess = u.get_default_session()
target0 = sess.run(self.target)
# A=u.diag(s).v', singular vectors are columns
# TODO: catch "ValueError: array must not contain infs or NaNs"
try:
u0, s0, vt0 = linalg.svd(target0)
v0 = vt0.T
except Exception as e:
print("Got error %s"%(repr(e),))
if DUMP_BAD_SVD:
dump32(target0, "badsvd")
print("gesdd failed, trying gesvd")
u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd")
v0 = vt0.T
feed_dict = {self.holder.u: u0,
self.holder.v: v0,
self.holder.s: s0}
sess.run(self.update_external_op, feed_dict=feed_dict)
def update_scipy_svd(self):
sess = u.get_default_session()
target0 = sess.run(self.target)
# A=u.diag(s).v', singular vectors are columns
# TODO: catch "ValueError: array must not contain infs or NaNs"
try:
u0, s0, vt0 = linalg.svd(target0)
v0 = vt0.T
except Exception as e:
print("Got error %s"%(repr(e),))
if DUMP_BAD_SVD:
dump32(target0, "badsvd")
print("gesdd failed, trying gesvd")
u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd")
v0 = vt0.T
feed_dict = {self.holder.u: u0,
self.holder.v: v0,
self.holder.s: s0}
sess.run(self.update_external_op, feed_dict=feed_dict)
def update_scipy_svd(self):
sess = u.get_default_session()
target0 = sess.run(self.target)
# A=u.diag(s).v', singular vectors are columns
# TODO: catch "ValueError: array must not contain infs or NaNs"
try:
u0, s0, vt0 = linalg.svd(target0)
v0 = vt0.T
except Exception as e:
print("Got error %s"%(repr(e),))
if DUMP_BAD_SVD:
dump32(target0, "badsvd")
print("gesdd failed, trying gesvd")
u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd")
v0 = vt0.T
feed_dict = {self.holder.u: u0,
self.holder.v: v0,
self.holder.s: s0}
sess.run(self.update_external_op, feed_dict=feed_dict)
def update_scipy_svd(self):
sess = u.get_default_session()
target0 = sess.run(self.target)
# A=u.diag(s).v', singular vectors are columns
# TODO: catch "ValueError: array must not contain infs or NaNs"
try:
u0, s0, vt0 = linalg.svd(target0)
v0 = vt0.T
except Exception as e:
print("Got error %s"%(repr(e),))
if DUMP_BAD_SVD:
dump32(target0, "badsvd")
print("gesdd failed, trying gesvd")
u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd")
v0 = vt0.T
feed_dict = {self.holder.u: u0,
self.holder.v: v0,
self.holder.s: s0}
sess.run(self.update_external_op, feed_dict=feed_dict)
def update_scipy_svd(self):
sess = u.get_default_session()
target0 = sess.run(self.target)
# A=u.diag(s).v', singular vectors are columns
# TODO: catch "ValueError: array must not contain infs or NaNs"
try:
u0, s0, vt0 = linalg.svd(target0)
v0 = vt0.T
except Exception as e:
print("Got error %s"%(repr(e),))
if DUMP_BAD_SVD:
dump32(target0, "badsvd")
print("gesdd failed, trying gesvd")
u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd")
v0 = vt0.T
feed_dict = {self.holder.u: u0,
self.holder.v: v0,
self.holder.s: s0}
sess.run(self.update_external_op, feed_dict=feed_dict)
def get_value_updater(self, data, new_mean, gamma_weighted, gamma_sum):
tf_new_differences = tf.subtract(data, tf.expand_dims(new_mean, 0))
tf_sq_dist_matrix = tf.matmul(tf.expand_dims(tf_new_differences, 2), tf.expand_dims(tf_new_differences, 1))
tf_new_covariance = tf.reduce_sum(tf_sq_dist_matrix * tf.expand_dims(tf.expand_dims(gamma_weighted, 1), 2), 0)
if self.has_prior:
tf_new_covariance = self.get_prior_adjustment(tf_new_covariance, gamma_sum)
tf_s, tf_u, _ = tf.svd(tf_new_covariance)
tf_required_eigvals = tf_s[:self.rank]
tf_required_eigvecs = tf_u[:, :self.rank]
tf_new_baseline = (tf.trace(tf_new_covariance) - tf.reduce_sum(tf_required_eigvals)) / self.tf_rest
tf_new_eigvals = tf_required_eigvals - tf_new_baseline
tf_new_eigvecs = tf.transpose(tf_required_eigvecs)
return tf.group(
self.tf_baseline.assign(tf_new_baseline),
self.tf_eigvals.assign(tf_new_eigvals),
self.tf_eigvecs.assign(tf_new_eigvecs)
)
eval_output_embeddings.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def pca_fit(X, n_components):
mean = tf.reduce_mean(X, axis=0)
centered_X = X - mean
S, U, V = tf.svd(centered_X)
return V[:n_components], mean
def orthogonal(gain=1.0, dtype=np.float32):
def _initializer(shape, dtype=dtype):
if len(shape) < 2:
raise RuntimeError("Only shapes of length 2 or more are "
"supported.")
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape)
return np.array(gain * q, dtype=np.float32)
return _initializer
def random_orthonormal_initializer(shape, dtype=tf.float32, partition_info=None):
"""Variable initializer that produces a random orthonormal matrix
Args:
shape: shape of the variable
Returns:
random_orthogonal_matrix for initialization.
"""
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError("Expecting square shape, got %s" % shape)
_, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
return u
def random_orthonormal_initializer(shape, dtype=tf.float32, partition_info=None): # pylint: disable=unused-argument
"""Variable initializer that produces a random orthonormal matrix."""
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError("Expecting square shape, got %s" % shape)
_, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
return u
def orthogonal_initializer():
"""Return an orthogonal initializer.
Random orthogonal matrix is byproduct of singular value decomposition
applied on a matrix initialized with normal distribution.
The initializer works with 2D square matrices and matrices that can be
splitted along axis 1 to several 2D matrices. In the latter case, each
submatrix is initialized independently and the resulting orthogonal
matrices are concatenated along axis 1.
Note this is a higher order function in order to mimic the tensorflow
initializer API.
"""
# pylint: disable=unused-argument
def func(shape, dtype, partition_info=None):
if len(shape) != 2:
raise ValueError(
"Orthogonal initializer only works with 2D matrices.")
if shape[1] % shape[0] != 0:
raise ValueError("Shape {} is not compatible with orthogonal "
"initializer.".format(str(shape)))
mult = int(shape[1] / shape[0])
dim = shape[0]
orthogonals = []
for _ in range(mult):
matrix = tf.random_normal([dim, dim], dtype=dtype)
orthogonals.append(tf.svd(matrix)[1])
return tf.concat(orthogonals, 1)
# pylint: enable=unused-argument
return func
# pylint: disable=too-few-public-methods
def orthogonal_initializer():
"""Return an orthogonal initializer.
Random orthogonal matrix is byproduct of singular value decomposition
applied on a matrix initialized with normal distribution.
The initializer works with 2D square matrices and matrices that can be
splitted along axis 1 to several 2D matrices. In the latter case, each
submatrix is initialized independently and the resulting orthogonal
matrices are concatenated along axis 1.
Note this is a higher order function in order to mimic the tensorflow
initializer API.
"""
# pylint: disable=unused-argument
def func(shape, dtype, partition_info=None):
if len(shape) != 2:
raise ValueError(
"Orthogonal initializer only works with 2D matrices.")
if shape[1] % shape[0] != 0:
raise ValueError("Shape {} is not compatible with orthogonal "
"initializer.".format(str(shape)))
mult = int(shape[1] / shape[0])
dim = shape[0]
orthogonals = []
for _ in range(mult):
matrix = tf.random_normal([dim, dim], dtype=dtype)
orthogonals.append(tf.svd(matrix)[1])
return tf.concat(orthogonals, 1)
# pylint: enable=unused-argument
return func
# pylint: disable=too-few-public-methods
def orthogonal_initializer():
"""Return an orthogonal initializer.
Random orthogonal matrix is byproduct of singular value decomposition
applied on a matrix initialized with normal distribution.
The initializer works with 2D square matrices and matrices that can be
splitted along axis 1 to several 2D matrices. In the latter case, each
submatrix is initialized independently and the resulting orthogonal
matrices are concatenated along axis 1.
Note this is a higher order function in order to mimic the tensorflow
initializer API.
"""
# pylint: disable=unused-argument
def func(shape, dtype, partition_info=None):
if len(shape) != 2:
raise ValueError(
"Orthogonal initializer only works with 2D matrices.")
if shape[1] % shape[0] != 0:
raise ValueError("Shape {} is not compatible with orthogonal "
"initializer.".format(str(shape)))
mult = int(shape[1] / shape[0])
dim = shape[0]
orthogonals = []
for _ in range(mult):
matrix = tf.random_normal([dim, dim], dtype=dtype)
orthogonals.append(tf.svd(matrix)[1])
return tf.concat(orthogonals, 1)
# pylint: enable=unused-argument
return func
# pylint: disable=too-few-public-methods
def pseudo_inverse(mat, eps=1e-10):
"""Computes pseudo-inverse of mat, treating eigenvalues below eps as 0."""
s, u, v = tf.svd(mat)
eps = 1e-10 # zero threshold for eigenvalues
si = tf.where(tf.less(s, eps), s, 1./s)
return u @ tf.diag(si) @ tf.transpose(v)
def symsqrt(mat, eps=1e-7):
"""Symmetric square root."""
s, u, v = tf.svd(mat)
# sqrt is unstable around 0, just use 0 in such case
print("Warning, cutting off at eps")
si = tf.where(tf.less(s, eps), s, tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt(mat, eps=1e-7):
"""half pseduo-inverse"""
s, u, v = tf.svd(mat)
# zero threshold for eigenvalues
si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt2(svd, eps=1e-7):
"""half pseduo-inverse, accepting existing values"""
# zero threshold for eigenvalues
if svd.__class__.__name__=='SvdTuple':
(s, u, v) = (svd.s, svd.u, svd.v)
elif svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse2(svd, eps=1e-7):
"""pseudo-inverse, accepting existing values"""
# use float32 machine precision as cut-off (works for MKL)
# https://www.wolframcloud.com/objects/927b2aa5-de9c-46f5-89fe-c4a58aa4c04b
if svd.__class__.__name__=='SvdTuple':
(s, u, v) = (svd.s, svd.u, svd.v)
elif svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
max_eigen = tf.reduce_max(s)
si = tf.where(s/max_eigen<eps, 0.*s, 1./s)
return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse2(svd, L=1e-3):
"""Regularized inverse, working from SVD"""
if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
max_eigen = tf.reduce_max(s)
# max_eigen = tf.Print(max_eigen, [max_eigen], "max_eigen")
#si = 1/(s + L*tf.ones_like(s)/max_eigen)
si = 1/(s+L*tf.ones_like(s))
return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse3(svd, L=1e-3):
"""Unbiased version of regularized_inverse2"""
if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
if L.__class__.__name__=='Var':
L = L.var
max_eigen = tf.reduce_max(s)
# max_eigen = tf.Print(max_eigen, [max_eigen], "max_eigen")
#si = 1/(s + L*tf.ones_like(s)/max_eigen)
si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s))
return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse4(svd, L=1e-3):
"""Uses relative norm"""
if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
if L.__class__.__name__=='Var':
L = L.var
max_eigen = tf.reduce_max(s)
L = L/max_eigen
si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s))
# si = tf.ones_like(s)
return u @ tf.diag(si) @ tf.transpose(v)
def symsqrt(mat, eps=1e-7):
"""Symmetric square root."""
s, u, v = tf.svd(mat)
# sqrt is unstable around 0, just use 0 in such case
print("Warning, cutting off at eps")
si = tf.where(tf.less(s, eps), s, tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt(mat, eps=1e-7):
"""half pseduo-inverse"""
s, u, v = tf.svd(mat)
# zero threshold for eigenvalues
si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt2(svd, eps=1e-7):
"""half pseduo-inverse, accepting existing values"""
# zero threshold for eigenvalues
if svd.__class__.__name__=='SvdTuple':
(s, u, v) = (svd.s, svd.u, svd.v)
elif svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s))
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse2(svd, eps=1e-7):
"""pseudo-inverse, accepting existing values"""
# use float32 machine precision as cut-off (works for MKL)
# https://www.wolframcloud.com/objects/927b2aa5-de9c-46f5-89fe-c4a58aa4c04b
if svd.__class__.__name__=='SvdTuple':
(s, u, v) = (svd.s, svd.u, svd.v)
elif svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
max_eigen = tf.reduce_max(s)
si = tf.where(s/max_eigen<eps, 0.*s, 1./s)
return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_stable(svd, eps=1e-7):
"""pseudo-inverse, accepting existing values"""
# use float32 machine precision as cut-off (works for MKL)
# https://www.wolframcloud.com/objects/927b2aa5-de9c-46f5-89fe-c4a58aa4c04b
if svd.__class__.__name__=='SvdTuple':
(s, u, v) = (svd.s, svd.u, svd.v)
elif svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
max_eigen = tf.reduce_max(s)
si = tf.where(s/max_eigen<eps, 0.*s, tf.pow(s, -0.9))
return u @ tf.diag(si) @ tf.transpose(v)
# todo: rename l to L
def regularized_inverse3(svd, L=1e-3):
"""Unbiased version of regularized_inverse2"""
if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
if L.__class__.__name__=='Var':
L = L.var
max_eigen = tf.reduce_max(s)
# max_eigen = tf.Print(max_eigen, [max_eigen], "max_eigen")
#si = 1/(s + L*tf.ones_like(s)/max_eigen)
si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s))
return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse4(svd, L=1e-3):
"""Uses relative norm"""
if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper':
(s, u, v) = (svd.s, svd.u, svd.v)
else:
assert False, "Unknown type"
if L.__class__.__name__=='Var':
L = L.var
max_eigen = tf.reduce_max(s)
L = L/max_eigen
si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s))
# si = tf.ones_like(s)
return u @ tf.diag(si) @ tf.transpose(v)