def update_link_matrix(self, link_matrix_old, precedence_weighting_old, write_weighting):
"""
Updating the link matrix takes some effort (in order to vectorize the implementation)
Instead of the original index-by-index operation, it's all done at once.
:param link_matrix_old: from previous time step, shape [batch_size, memory_size, memory_size]
:param precedence_weighting_old: from previous time step, shape [batch_size, memory_size]
:param write_weighting: from current time step, shape [batch_size, memory_size]
:return: updated link matrix
"""
expanded = tf.expand_dims(write_weighting, axis=2)
# vectorizing the paper's original implementation
w = tf.tile(expanded, [1, 1, self.memory_size]) # shape [batch_size, memory_size, memory_size]
# shape of w_transpose is the same: [batch_size, memory_size, memory_size]
w_transp = tf.tile(tf.transpose(expanded, [0, 2, 1]), [1, self.memory_size, 1])
# in einsum, m and n are the same dimension because tensorflow doesn't support duplicated subscripts. Why?
lm = (1 - w - w_transp) * link_matrix_old + tf.einsum("bn,bm->bmn", precedence_weighting_old, write_weighting)
lm *= (1 - tf.eye(self.memory_size, batch_shape=[self.batch_size])) # making sure self links are off
return tf.identity(lm, name="Link_matrix")
python类eye()的实例源码
def test_whiten(self):
"""
make sure that predicting using the whitened representation is the
sameas the non-whitened one.
"""
with self.test_context() as sess:
Xs, X, F, k, num_data, feed_dict = self.prepare()
k.compile(session=sess)
K = k.K(X) + tf.eye(num_data, dtype=settings.float_type) * 1e-6
L = tf.cholesky(K)
V = tf.matrix_triangular_solve(L, F, lower=True)
Fstar_mean, Fstar_var = gpflow.conditionals.conditional(Xs, X, k, F)
Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(Xs, X, k, V, white=True)
mean1, var1 = sess.run([Fstar_w_mean, Fstar_w_var], feed_dict=feed_dict)
mean2, var2 = sess.run([Fstar_mean, Fstar_var], feed_dict=feed_dict)
# TODO: should tolerance be type dependent?
assert_allclose(mean1, mean2)
assert_allclose(var1, var2)
def _build_predict(self, Xnew, full_cov=False):
"""
Xnew is a data matrix, point at which we want to predict
This method computes
p(F* | Y )
where F* are points on the GP at Xnew, Y are noisy observations at X.
"""
Kx = self.kern.K(self.X, Xnew)
K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance
L = tf.cholesky(K)
A = tf.matrix_triangular_solve(L, Kx, lower=True)
V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
fmean = tf.matmul(A, V, transpose_a=True) + self.mean_function(Xnew)
if full_cov:
fvar = self.kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
else:
fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]])
return fmean, fvar
def __init__(self, X, Y, kern, likelihood,
mean_function=None,
num_latent=None,
**kwargs):
"""
X is a data matrix, size N x D
Y is a data matrix, size N x R
kern, likelihood, mean_function are appropriate GPflow objects
"""
X = DataHolder(X)
Y = DataHolder(Y)
GPModel.__init__(self, X, Y, kern, likelihood, mean_function, **kwargs)
self.num_data = X.shape[0]
self.num_latent = num_latent or Y.shape[1]
self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent)))
q_sqrt = np.array([np.eye(self.num_data)
for _ in range(self.num_latent)]).swapaxes(0, 2)
transform = transforms.LowerTriangular(self.num_data, self.num_latent)
self.q_sqrt = Parameter(q_sqrt, transform=transform)
def gather_indices_2d(x, block_shape, block_stride):
"""Getting gather indices."""
# making an identity matrix kernel
kernel = tf.eye(block_shape[0] * block_shape[1])
kernel = reshape_range(kernel, 0, 1, [block_shape[0], block_shape[1], 1])
# making indices [1, h, w, 1] to appy convs
x_shape = common_layers.shape_list(x)
indices = tf.range(x_shape[2] * x_shape[3])
indices = tf.reshape(indices, [1, x_shape[2], x_shape[3], 1])
indices = tf.nn.conv2d(
tf.cast(indices, tf.float32),
kernel,
strides=[1, block_stride[0], block_stride[1], 1],
padding="VALID")
# making indices [num_blocks, dim] to gather
dims = common_layers.shape_list(indices)[:3]
if all([isinstance(dim, int) for dim in dims]):
num_blocks = functools.reduce(operator.mul, dims, 1)
else:
num_blocks = tf.reduce_prod(dims)
indices = tf.reshape(indices, [num_blocks, -1])
return tf.cast(indices, tf.int32)
def normal_sample(mean, var, full_cov=False):
if full_cov is False:
z = tf.random_normal(tf.shape(mean), dtype=float_type)
return mean + z * var ** 0.5
else:
S, N, D = shape_as_list(mean) # var is SNND
mean = tf.transpose(mean, (0, 2, 1)) # SND -> SDN
var = tf.transpose(var, (0, 3, 1, 2)) # SNND -> SDNN
# I = jitter * tf.eye(N, dtype=float_type)[None, None, :, :] # 11NN
chol = tf.cholesky(var)# + I) # SDNN should be ok without as var already has jitter
z = tf.random_normal([S, D, N, 1], dtype=float_type)
f = mean + tf.matmul(chol, z)[:, :, :, 0] # SDN(1)
return tf.transpose(f, (0, 2, 1)) # SND
def generate_y(self, y_labeled):
y_unlabeled_tiled = tf.reshape(tf.tile(tf.eye(self._num_classes), [1, self._batch_size]),
[self._num_classes * self._batch_size, self._num_classes])
y_all = tf.concat([y_labeled, y_unlabeled_tiled], 0)
return y_all, y_unlabeled_tiled
def generate_y(self, y_labeled):
y_unlabeled_tiled = tf.reshape(tf.tile(tf.eye(self._num_classes), [1, self._batch_size]),
[self._num_classes * self._batch_size, self._num_classes])
y_all = tf.concat([y_labeled, y_unlabeled_tiled], 0)
return y_all, y_unlabeled_tiled
def eye(n, m):
return tf.eye(n, m)
def compute_moments(_inputs, moments=[2, 3]):
"""From an image input, compute moments"""
_inputs_sq = tf.square(_inputs)
_inputs_cube = tf.pow(_inputs, 3)
height = int(_inputs.get_shape()[1])
width = int(_inputs.get_shape()[2])
channels = int(_inputs.get_shape()[3])
def ConvFlatten(x, kernel_size):
# w_sum = tf.ones([kernel_size, kernel_size, channels, 1]) / (kernel_size * kernel_size * channels)
w_sum = tf.eye(num_rows=channels, num_columns=channels, batch_shape=[kernel_size * kernel_size])
w_sum = tf.reshape(w_sum, [kernel_size, kernel_size, channels, channels])
w_sum = w_sum / (kernel_size * kernel_size)
sum_ = tf.nn.conv2d(x, w_sum, strides=[1, 1, 1, 1], padding='VALID')
size = prod_dim(sum_)
assert size == (height - kernel_size + 1) * (width - kernel_size + 1) * channels, size
return tf.reshape(sum_, [-1, size])
outputs = []
for size in [3, 4, 5]:
mean = ConvFlatten(_inputs, size)
square = ConvFlatten(_inputs_sq, size)
var = square - tf.square(mean)
if 2 in moments:
outputs.append(var)
if 3 in moments:
cube = ConvFlatten(_inputs_cube, size)
skewness = cube - 3.0 * mean * var - tf.pow(mean, 3) # Unnormalized
outputs.append(skewness)
return tf.concat(outputs, 1)
def _build_likelihood(self):
"""
Construct a tensorflow function to compute the likelihood.
\log p(Y | theta).
"""
K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance
L = tf.cholesky(K)
m = self.mean_function(self.X)
return multivariate_normal(self.Y, m, L)
def compile(self, session=None):
"""
Before calling the standard compile function, check to see if the size
of the data has changed and add variational parameters appropriately.
This is necessary because the shape of the parameters depends on the
shape of the data.
"""
if not self.num_data == self.X.shape[0]:
self.num_data = self.X.shape[0]
self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent)))
self.q_sqrt = Parameter(np.eye(self.num_data)[:, :, None] *
np.ones((1, 1, self.num_latent)))
return super(VGP, self).compile(session=session)
def _build_likelihood(self):
"""
q_alpha, q_lambda are variational parameters, size N x R
This method computes the variational lower bound on the likelihood,
which is:
E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
with
q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
"""
K = self.kern.K(self.X)
K_alpha = tf.matmul(K, self.q_alpha)
f_mean = K_alpha + self.mean_function(self.X)
# compute the variance for each of the outputs
I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0),
[self.num_latent, 1, 1])
A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
L = tf.cholesky(A)
Li = tf.matrix_triangular_solve(L, I)
tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))
# some statistics about A are used in the KL
A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
trAi = tf.reduce_sum(tf.square(Li))
KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
tf.reduce_sum(K_alpha * self.q_alpha))
v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
return tf.reduce_sum(v_exp) - KL
def _build_likelihood(self):
"""
Construct a tf function to compute the likelihood of a general GP
model.
\log p(Y, V | theta).
"""
K = self.kern.K(self.X)
L = tf.cholesky(
K + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * settings.numerics.jitter_level)
F = tf.matmul(L, self.V) + self.mean_function(self.X)
return tf.reduce_sum(self.likelihood.logp(F, self.Y))
def predict_f_samples(self, Xnew, num_samples):
"""
Produce samples from the posterior latent function(s) at the points
Xnew.
"""
mu, var = self._build_predict(Xnew, full_cov=True)
jitter = tf.eye(tf.shape(mu)[0], dtype=settings.float_type) * settings.numerics.jitter_level
samples = []
for i in range(self.num_latent):
L = tf.cholesky(var[:, :, i] + jitter)
shape = tf.stack([tf.shape(L)[0], num_samples])
V = tf.random_normal(shape, dtype=settings.float_type)
samples.append(mu[:, i:i + 1] + tf.matmul(L, V))
return tf.transpose(tf.stack(samples))
def _build_likelihood(self):
"""
Construct a tensorflow function to compute the bound on the marginal
likelihood. For a derivation of the terms in here, see the associated
SGPR notebook.
"""
num_inducing = len(self.feature)
num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)
output_dim = tf.cast(tf.shape(self.Y)[1], settings.float_type)
err = self.Y - self.mean_function(self.X)
Kdiag = self.kern.Kdiag(self.X)
Kuf = self.feature.Kuf(self.kern, self.X)
Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
L = tf.cholesky(Kuu)
sigma = tf.sqrt(self.likelihood.variance)
# Compute intermediate matrices
A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
AAT = tf.matmul(A, A, transpose_b=True)
B = AAT + tf.eye(num_inducing, dtype=settings.float_type)
LB = tf.cholesky(B)
Aerr = tf.matmul(A, err)
c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
# compute log marginal bound
bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
bound += tf.negative(output_dim) * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance)
bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
bound += 0.5 * tf.reduce_sum(tf.square(c))
bound += -0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance
bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT))
return bound
def _build_predict(self, Xnew, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
num_inducing = len(self.feature)
err = self.Y - self.mean_function(self.X)
Kuf = self.feature.Kuf(self.kern, self.X)
Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
Kus = self.feature.Kuf(self.kern, Xnew)
sigma = tf.sqrt(self.likelihood.variance)
L = tf.cholesky(Kuu)
A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
LB = tf.cholesky(B)
Aerr = tf.matmul(A, err)
c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
mean = tf.matmul(tmp2, c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
def _build_predict(self, Xnew, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points.
Note that this is very similar to the SGPR prediction, for which
there are notes in the SGPR notebook.
:param Xnew: Point to predict at.
"""
num_inducing = tf.shape(self.Z)[0]
psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var)
psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0)
Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level
Kus = self.kern.K(self.Z, Xnew)
sigma2 = self.likelihood.variance
sigma = tf.sqrt(sigma2)
L = tf.cholesky(Kuu)
A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
B = AAT + tf.eye(num_inducing, dtype=settings.float_type)
LB = tf.cholesky(B)
c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma
tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
mean = tf.matmul(tmp2, c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
def Kuu(self, kern, jitter=0.0):
Kzz = kern.K(self.Z)
Kzz += jitter * tf.eye(len(self), dtype=settings.dtypes.float_type)
return Kzz
def Kuu(self, kern, jitter=0.0):
if isinstance(kern, kernels.RBF):
with decors.params_as_tensors_for(kern):
Zmu, Zlen = kern._slice(self.Z, self.scales)
idlengthscales2 = tf.square(kern.lengthscales + Zlen)
sc = tf.sqrt(
tf.expand_dims(idlengthscales2, 0) + tf.expand_dims(idlengthscales2, 1) - tf.square(
kern.lengthscales))
d = self._cust_square_dist(Zmu, Zmu, sc)
Kzz = kern.variance * tf.exp(-d / 2) * tf.reduce_prod(kern.lengthscales / sc, 2)
Kzz += jitter * tf.eye(len(self), dtype=settings.float_type)
return Kzz
else:
raise NotImplementedError(
"Multiscale features not implemented for `%s`." % str(type(kern)))
def get_dictionary(self, session):
"""Fetch (approximately) the learned code dictionary.
Args:
session: TensorFlow session to use.
Returns:
The code dictionary, with shape (hidden_units, input_dim).
"""
fake_input = 1e15 * tf.eye(self.hidden_units)
return session.run(self._decode_layer(fake_input, reuse=True))
def apply(self, is_train, x, x_mask=None):
x_word_dim = tf.shape(x)[1]
# (batch, x_word, key_word)
dist_matrix = self.attention.get_scores(x, x)
dist_matrix += tf.expand_dims(tf.eye(x_word_dim) * VERY_NEGATIVE_NUMBER, 0) # Mask out self
joint_mask = compute_attention_mask(x_mask, x_mask, x_word_dim, x_word_dim)
if joint_mask is not None:
dist_matrix += VERY_NEGATIVE_NUMBER * (1 - tf.cast(joint_mask, dist_matrix.dtype))
if not self.alignment_bias:
select_probs = tf.nn.softmax(dist_matrix)
else:
# Allow zero-attention by adding a learned bias to the normalizer
bias = tf.exp(tf.get_variable("no-alignment-bias", initializer=tf.constant(-1.0, dtype=tf.float32)))
dist_matrix = tf.exp(dist_matrix)
select_probs = dist_matrix / (tf.reduce_sum(dist_matrix, axis=2, keep_dims=True) + bias)
response = tf.matmul(select_probs, x) # (batch, x_words, q_dim)
if self.merge is not None:
with tf.variable_scope("merge"):
response = self.merge.apply(is_train, response, x)
return response
else:
return response
def convolve(image, pixel_filter, channels=3, name=None):
"""Perform a 2D pixel convolution on the given image.
Arguments:
image: A 3D `float32` `Tensor` of shape `[height, width, channels]`,
where `channels` is the third argument to this function and the
first two dimensions are arbitrary.
pixel_filter: A 2D `Tensor`, representing pixel weightings for the
kernel. This will be used to create a 4D kernel---the extra two
dimensions are for channels (see `tf.nn.conv2d` documentation),
and the kernel will be constructed so that the channels are
independent: each channel only observes the data from neighboring
pixels of the same channel.
channels: An integer representing the number of channels in the
image (e.g., 3 for RGB).
Returns:
A 3D `float32` `Tensor` of the same shape as the input.
"""
with tf.name_scope(name, 'convolve'):
tf.assert_type(image, tf.float32)
channel_filter = tf.eye(channels)
filter_ = (tf.expand_dims(tf.expand_dims(pixel_filter, -1), -1) *
tf.expand_dims(tf.expand_dims(channel_filter, 0), 0))
result_batch = tf.nn.conv2d(tf.stack([image]), # batch
filter=filter_,
strides=[1, 1, 1, 1],
padding='SAME')
return result_batch[0] # unbatch
def sim_multitask_GP(times,length,noise_vars,K_f,trainfrac):
"""
draw from a multitask GP.
we continue to assume for now that the dim of the input space is 1, ie just time
M: number of tasks (labs/vitals/time series)
train_frac: proportion of full M x N data matrix Y to include
"""
M = np.shape(K_f)[0]
N = len(times)
n = N*M
K_t = OU_kernel_np(length,times) #just a correlation function
Sigma = np.diag(noise_vars)
K = np.kron(K_f,K_t) + np.kron(Sigma,np.eye(N)) + 1e-6*np.eye(n)
L_K = np.linalg.cholesky(K)
y = np.dot(L_K,np.random.normal(0,1,n)) #Draw normal
#get indices of which time series and which time point, for each element in y
ind_kf = np.tile(np.arange(M),(N,1)).flatten('F') #vec by column
ind_kx = np.tile(np.arange(N),(M,1)).flatten()
#randomly dropout some fraction of fully observed time series
perm = np.random.permutation(n)
n_train = int(trainfrac*n)
train_inds = perm[:n_train]
y_ = y[train_inds]
ind_kf_ = ind_kf[train_inds]
ind_kx_ = ind_kx[train_inds]
return y_,ind_kf_,ind_kx_
def testCompareProjectSumAndProject(self):
# Compare results of project_sum and project.
tens = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=4)
tangent_tens = initializers.random_tensor((2, 3, 4), 4)
project_sum = riemannian.project_sum(tens, tangent_tens, tf.eye(4))
project = riemannian.project(tens, tangent_tens)
with self.test_session() as sess:
res = sess.run((ops.full(project_sum), ops.full(project)))
project_sum_val, project_val = res
self.assertAllClose(project_sum_val, project_val)
def __call__(self, placeholder=None, moving_params=None):
""""""
embeddings = super(PretrainedVocab, self).__call__(placeholder, moving_params=moving_params)
# (n x b x d') -> (n x b x d)
with tf.variable_scope(self.name.title()):
matrix = linalg.linear(embeddings, self.token_embed_size, moving_params=moving_params)
if moving_params is None:
with tf.variable_scope('Linear', reuse=True):
weights = tf.get_variable('Weights')
tf.losses.add_loss(tf.nn.l2_loss(tf.matmul(tf.transpose(weights), weights) - tf.eye(self.token_embed_size)))
return matrix
#return embeddings # changed in saves2/test8
#=============================================================
def linear_combine(clen, pclen, idx):
Wl = param.get('Wl')
Wr = param.get('Wr')
dim = tf.unstack(tf.shape(Wl))[0]
batch_shape = tf.shape(clen)
f = (clen / pclen)
l = (pclen - idx - 1) / (pclen - 1)
r = (idx) / (pclen - 1)
# when pclen == 1, replace nan items with 0.5
l = tf.where(tf.is_nan(l), tf.ones_like(l) * 0.5, l)
r = tf.where(tf.is_nan(r), tf.ones_like(r) * 0.5, r)
lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l)
rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r)
fb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * f)
lb = tf.reshape(lb, [-1, hyper.word_dim])
rb = tf.reshape(rb, [-1, hyper.word_dim])
tmp = tf.matmul(lb, Wl) + tf.matmul(rb, Wr)
tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.word_dim])
return tf.matmul(fb, tmp)
def tri_combined(idx, pclen, depth, max_depth):
"""TF function, input: idx, pclen, depth, max_depth as batch (1D Tensor)
Output: weight tensor (3D Tensor), first dim is batch
"""
Wconvt = param.get('Wconvt')
Wconvl = param.get('Wconvl')
Wconvr = param.get('Wconvr')
dim = tf.unstack(tf.shape(Wconvt))[0]
batch_shape = tf.shape(idx)
tmp = (idx - 1) / (pclen - 1)
# when pclen == 1, replace nan items with 0.5
tmp = tf.where(tf.is_nan(tmp), tf.ones_like(tmp) * 0.5, tmp)
t = (max_depth - depth) / max_depth
r = (1 - t) * tmp
l = (1 - t) * (1 - r)
lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l)
rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r)
tb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * t)
lb = tf.reshape(lb, [-1, dim])
rb = tf.reshape(rb, [-1, dim])
tb = tf.reshape(tb, [-1, dim])
tmp = tf.matmul(lb, Wconvl) + tf.matmul(rb, Wconvr) + tf.matmul(tb, Wconvt)
tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.conv_dim])
return tmp
def __init__(self, n_labeled, n_unlabeled, n_classes):
self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
w_init = tf.random_uniform(shape=[], minval=0.5, maxval=5)
self._w = w = tf.get_variable("w", dtype=tf.float32, initializer=w_init)
b_init = tf.random_uniform(shape=[], minval=-1, maxval=1)
self._b = b = tf.get_variable("b", dtype=tf.float32, initializer=b_init)
tuu = tf.sigmoid(w * t_uu + b)
tul = tf.sigmoid(w * t_ul + b)
# tuu = tf.Print(tuu, [tuu], 'tuu', summarize=30)
# tul = tf.Print(tul, [tul], 'tul', summarize=30)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
self._entropy = entropy = - tf.reduce_sum(y * tf.log(y))
self._train_op = tf.train.AdamOptimizer(0.005).minimize(entropy)
def __init__(self, n_labeled, n_unlabeled, n_classes):
self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
w_init = tf.random_uniform(shape=[], minval=0.5, maxval=5)
self._w = w = tf.get_variable("w", dtype=tf.float32, initializer=w_init)
b_init = tf.random_uniform(shape=[], minval=-1, maxval=1)
self._b = b = tf.get_variable("b", dtype=tf.float32, initializer=b_init)
tuu = tf.sigmoid(w * t_uu + b)
tul = tf.sigmoid(w * t_ul + b)
# tuu = tf.Print(tuu, [tuu], 'tuu', summarize=30)
# tul = tf.Print(tul, [tul], 'tul', summarize=30)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
self._entropy = entropy = - tf.reduce_sum(y * tf.log(y))
self._train_op = tf.train.AdamOptimizer(0.1).minimize(entropy)