def __init__(self, n_labeled, n_unlabeled, n_classes):
self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
self._w = w = tf.placeholder(tf.float32, shape=[])
self._b = b = tf.placeholder(tf.float32, shape=[])
tuu = tf.sigmoid(w * t_uu + b)
tul = tf.sigmoid(w * t_ul + b)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
python类eye()的实例源码
def __init__(self, n_labeled, n_unlabeled, input_dims, n_classes):
self._t_uu = tuu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = tul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
tuu = tf.sigmoid(t_uu)
tul = tf.sigmoid(t_ul)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def __init__(self, n_labeled, n_unlabeled, n_classes):
self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
tuu = tf.sigmoid(t_uu)
tul = tf.sigmoid(t_ul)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def __init__(self, n_labeled, n_unlabeled, n_classes):
self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled])
self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled])
self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes])
self._w = w = tf.placeholder(tf.float32, shape=[])
self._b = b = tf.placeholder(tf.float32, shape=[])
tuu = tf.sigmoid(w * t_uu + b)
tul = tf.sigmoid(w * t_ul + b)
# column normalization
tuu_col_norms = tf.norm(tuu, ord=1, axis=0)
tul_col_norms = tf.norm(tul, ord=1, axis=0)
tuu /= tuu_col_norms
tul /= tul_col_norms
# row normalization
tuu_row_norms = tf.norm(tuu, ord=1, axis=1)
tul_row_norms = tf.norm(tul, ord=1, axis=1)
tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1])
tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1])
I = tf.eye(n_unlabeled, dtype=tf.float32)
inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01)
y_u = tf.matmul(tf.matmul(inv, tul), y_l)
y = tf.concat([y_u, y_l], 0)
self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def add_minibatch_features(image,df_dim):
shape = image.get_shape().as_list()
dim = np.prod(shape[1:]) # dim = prod(9,2) = 18
h_mb0 = lrelu(conv2d(image, df_dim, name='d_mb0_conv'))
h_mb1 = conv2d(h_mb0, df_dim, name='d_mbh1_conv')
dims=h_mb1.get_shape().as_list()
conv_dims=np.prod(dims[1:])
image_ = tf.reshape(h_mb1, tf.stack([-1, conv_dims]))
#image_ = tf.reshape(h_mb1, tf.stack([batch_size, -1]))
n_kernels = 300
dim_per_kernel = 50
x = linear(image_, n_kernels * dim_per_kernel,'d_mbLinear')
act = tf.reshape(x, (-1, n_kernels, dim_per_kernel))
act= tf.reshape(x, (-1, n_kernels, dim_per_kernel))
act_tp=tf.transpose(act, [1,2,0])
#bs x n_ker x dim_ker x bs -> bs x n_ker x bs :
abs_dif = tf.reduce_sum(tf.abs(tf.expand_dims(act, 3) - tf.expand_dims(act_tp, 0)), 2)
eye=tf.expand_dims( tf.eye( tf.shape(abs_dif)[0] ), 1)#bs x 1 x bs
masked=tf.exp(-abs_dif) - eye
f1=tf.reduce_mean( masked, 2)
mb_features = tf.reshape(f1, [-1, 1, 1, n_kernels])
return conv_cond_concat(image, mb_features)
## following is from https://github.com/openai/improved-gan/blob/master/imagenet/discriminator.py#L88
#def add_minibatch_features(image,df_dim,batch_size):
# shape = image.get_shape().as_list()
# dim = np.prod(shape[1:]) # dim = prod(9,2) = 18
# h_mb0 = lrelu(conv2d(image, df_dim, name='d_mb0_conv'))
# h_mb1 = conv2d(h_mb0, df_dim, name='d_mbh1_conv')
#
# dims=h_mb1.get_shape().as_list()
# conv_dims=np.prod(dims[1:])
#
# image_ = tf.reshape(h_mb1, tf.stack([-1, conv_dims]))
# #image_ = tf.reshape(h_mb1, tf.stack([batch_size, -1]))
#
# n_kernels = 300
# dim_per_kernel = 50
# x = linear(image_, n_kernels * dim_per_kernel,'d_mbLinear')
# activation = tf.reshape(x, (batch_size, n_kernels, dim_per_kernel))
# big = np.zeros((batch_size, batch_size), dtype='float32')
# big += np.eye(batch_size)
# big = tf.expand_dims(big, 1)
# abs_dif = tf.reduce_sum(tf.abs(tf.expand_dims(activation, 3) - tf.expand_dims(tf.transpose(activation, [1, 2, 0]), 0)), 2)
# mask = 1. - big
# masked = tf.exp(-abs_dif) * mask
# f1 = tf.reduce_sum(masked, 2) / tf.reduce_sum(mask)
# mb_features = tf.reshape(f1, [batch_size, 1, 1, n_kernels])
# return conv_cond_concat(image, mb_features)
def interatomic_distances(positions, cell, pbc, cutoff):
with tf.variable_scope('distance'):
# calculate heights
# account for zero cell in case of no pbc
c = tf.reduce_sum(tf.cast(pbc, tf.int32)) > 0
icell = tf.cond(c, lambda: tf.matrix_inverse(cell),
lambda: tf.eye(3))
height = 1. / tf.sqrt(tf.reduce_sum(tf.square(icell), 0))
extent = tf.where(tf.cast(pbc, tf.bool),
tf.cast(tf.floor(cutoff / height), tf.int32),
tf.cast(tf.zeros_like(height), tf.int32))
n_reps = tf.reduce_prod(2 * extent + 1)
# replicate atoms
r = tf.range(-extent[0], extent[0] + 1)
v0 = tf.expand_dims(r, 1)
v0 = tf.tile(v0,
tf.stack(((2 * extent[1] + 1) * (2 * extent[2] + 1), 1)))
v0 = tf.reshape(v0, tf.stack((n_reps, 1)))
r = tf.range(-extent[1], extent[1] + 1)
v1 = tf.expand_dims(r, 1)
v1 = tf.tile(v1, tf.stack((2 * extent[2] + 1, 2 * extent[0] + 1)))
v1 = tf.reshape(v1, tf.stack((n_reps, 1)))
v2 = tf.expand_dims(tf.range(-extent[2], extent[2] + 1), 1)
v2 = tf.tile(v2,
tf.stack((1, (2 * extent[0] + 1) * (2 * extent[1] + 1))))
v2 = tf.reshape(v2, tf.stack((n_reps, 1)))
v = tf.cast(tf.concat((v0, v1, v2), axis=1), tf.float32)
offset = tf.matmul(v, cell)
offset = tf.expand_dims(offset, 0)
# add axes
positions = tf.expand_dims(positions, 1)
rpos = positions + offset
rpos = tf.expand_dims(rpos, 0)
positions = tf.expand_dims(positions, 1)
euclid_dist = tf.sqrt(
tf.reduce_sum(tf.square(positions - rpos),
reduction_indices=3))
return euclid_dist
def get_weight(self, name, shape,
init='glorot',
device='gpu',
weight_val=None,
trainable=True):
"""Creates a new weight.
Args:
name: str, the name of the variable.
shape: tuple of ints, the shape of the variable.
init: str, the type of initialize to use.
device: str, 'cpu' or 'gpu'.
weight_val: Numpy array to use as the initial weights.
trainable: bool, whether or not this weight is trainable.
Returns:
a trainable TF variable with shape `shape`.
"""
if weight_val is None:
init = init.lower()
if init == 'normal':
initializer = (lambda shape, dtype, partition_info:
tf.random_normal(shape, stddev=0.05))
elif init == 'uniform':
initializer = (lambda shape, dtype, partition_info:
tf.random_uniform(shape, stddev=0.05))
elif init == 'glorot':
initializer = (lambda shape, dtype, partition_info:
tf.random_normal(
shape, stddev=np.sqrt(6. / sum(shape))))
elif init == 'eye':
assert all(i == shape[0] for i in shape)
initializer = (lambda shape, dtype, partition_info:
tf.eye(shape[0]))
elif init == 'zero':
initializer = (lambda shape, dtype, partition_info:
tf.zeros(shape))
else:
raise ValueError('Invalid init: "%s"' % init)
else:
weight_val = weight_val.astype('float32')
device = device.lower()
if device == 'gpu':
on_gpu = True
elif device == 'cpu':
on_gpu = False
else:
raise ValueError('Invalid device: "%s"' % device)
if self._only_cpu:
on_gpu = False
with tf.device('/gpu:0' if on_gpu else '/cpu:0'):
weight = tf.get_variable(name=name,
shape=shape,
initializer=initializer,
trainable=trainable)
self._weights.append(weight)
return weight
def discriminator_lks_test(self, opts, input_):
"""Deterministic discriminator using Kernel Stein Discrepancy test
refer to the quadratic test of https://arxiv.org/pdf/1705.07673.pdf
The statistic basically reads:
\[
\frac{1}{n^2 - n}\sum_{i \neq j} \left(
frac{<x_i, x__j>}{\sigma_p^4}
+ d/\sigma_k^2
- \|x_i - x_j\|^2\left(\frac{1}{\sigma_p^2\sigma_k^2} + \frac{1}{\sigma_k^4}\right)
\right)
\exp( - \|x_i - x_j\|^2/2/\sigma_k^2)
\]
"""
n = self.get_batch_size(opts, input_)
n = tf.cast(n, tf.int32)
half_size = (n * n - n) / 2
nf = tf.cast(n, tf.float32)
norms = tf.reduce_sum(tf.square(input_), axis=1, keep_dims=True)
dotprods = tf.matmul(input_, input_, transpose_b=True)
distances = norms + tf.transpose(norms) - 2. * dotprods
sigma2_p = opts['pot_pz_std'] ** 2 # var = std ** 2
# Median heuristic for the sigma^2 of Gaussian kernel
# sigma2_k = tf.nn.top_k(tf.reshape(distances, [-1]), half_size).values[half_size - 1]
# Maximal heuristic for the sigma^2 of Gaussian kernel
# sigma2_k = tf.nn.top_k(tf.reshape(distances, [-1]), 1).values[0]
sigma2_k = opts['latent_space_dim'] * sigma2_p
if opts['verbose'] == 2:
sigma2_k = tf.Print(sigma2_k, [tf.nn.top_k(tf.reshape(distances, [-1]), 1).values[0]],
'Maximal squared pairwise distance:')
sigma2_k = tf.Print(sigma2_k, [tf.reduce_mean(distances)],
'Average squared pairwise distance:')
sigma2_k = tf.Print(sigma2_k, [sigma2_k], 'Kernel width:')
res = dotprods / sigma2_p ** 2 \
- distances * (1. / sigma2_p / sigma2_k + 1. / sigma2_k ** 2) \
+ opts['latent_space_dim'] / sigma2_k
res = tf.multiply(res, tf.exp(- distances / 2./ sigma2_k))
res = tf.multiply(res, 1. - tf.eye(n))
stat = tf.reduce_sum(res) / (nf * nf - nf)
# stat = tf.reduce_sum(res) / (nf * nf)
return stat
def add_least_gaussian2d_ops(self, opts):
""" Add ops searching for the 2d plane in z_dim hidden space
corresponding to the 'least Gaussian' look of the sample
"""
with tf.variable_scope('leastGaussian2d'):
# Projection matrix which we are going to tune
sample_ph = tf.placeholder(
tf.float32, [None, opts['latent_space_dim']],
name='sample_ph')
v = tf.get_variable(
"proj_v", [opts['latent_space_dim'], 1],
tf.float32, tf.random_normal_initializer(stddev=1.))
u = tf.get_variable(
"proj_u", [opts['latent_space_dim'], 1],
tf.float32, tf.random_normal_initializer(stddev=1.))
npoints = tf.cast(tf.shape(sample_ph)[0], tf.int32)
# First we need to make sure projection matrix is orthogonal
v_norm = tf.nn.l2_normalize(v, 0)
dotprod = tf.reduce_sum(tf.multiply(u, v_norm))
u_ort = u - dotprod * v_norm
u_norm = tf.nn.l2_normalize(u_ort, 0)
Mproj = tf.concat([v_norm, u_norm], 1)
sample_proj = tf.matmul(sample_ph, Mproj)
a = tf.eye(npoints) - tf.ones([npoints, npoints]) / tf.cast(npoints, tf.float32)
b = tf.matmul(sample_proj, tf.matmul(a, a), transpose_a=True)
b = tf.matmul(b, sample_proj)
# Sample covariance matrix
covhat = b / (tf.cast(npoints, tf.float32) - 1)
# covhat = tf.Print(covhat, [covhat], 'Cov:')
with tf.variable_scope('leastGaussian2d'):
gcov = opts['pot_pz_std'] * opts['pot_pz_std'] * tf.eye(2)
# l2 distance between sample cov and the Gaussian cov
projloss = tf.reduce_sum(tf.square(covhat - gcov))
# Also account for the first moment, i.e. expected value
projloss += tf.reduce_sum(tf.square(tf.reduce_mean(sample_proj, 0)))
# We are maximizing
projloss = -projloss
optim = tf.train.AdamOptimizer(0.001, 0.9)
optim = optim.minimize(projloss, var_list=[v, u])
self._proj_u = u_norm
self._proj_v = v_norm
self._proj_sample_ph = sample_ph
self._proj_covhat = covhat
self._proj_loss = projloss
self._proj_optim = optim
def _build_likelihood(self):
"""
Construct a tensorflow function to compute the bound on the marginal
likelihood.
"""
num_inducing = tf.shape(self.Z)[0]
psi0 = tf.reduce_sum(self.kern.eKdiag(self.X_mean, self.X_var), 0)
psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var)
psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0)
Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level
L = tf.cholesky(Kuu)
sigma2 = self.likelihood.variance
sigma = tf.sqrt(sigma2)
# Compute intermediate matrices
A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
B = AAT + tf.eye(num_inducing, dtype=settings.float_type)
LB = tf.cholesky(B)
log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma
# KL[q(x) || p(x)]
dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var)
NQ = tf.cast(tf.size(self.X_mean), settings.float_type)
D = tf.cast(tf.shape(self.Y)[1], settings.float_type)
KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \
+ 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \
- 0.5 * NQ \
+ 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var)
# compute log marginal bound
ND = tf.cast(tf.size(self.Y), settings.float_type)
bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
bound += -0.5 * D * log_det_B
bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
bound += 0.5 * tf.reduce_sum(tf.square(c))
bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
tf.reduce_sum(tf.matrix_diag_part(AAT)))
bound -= KL
return bound
def conditional(Xnew, X, kern, f, *, full_cov=False, q_sqrt=None, white=False):
"""
Given f, representing the GP at the points X, produce the mean and
(co-)variance of the GP at the points Xnew.
Additionally, there may be Gaussian uncertainty about f as represented by
q_sqrt. In this case `f` represents the mean of the distribution and
q_sqrt the square-root of the covariance.
Additionally, the GP may have been centered (whitened) so that
p(v) = N(0, I)
f = L v
thus
p(f) = N(0, LL^T) = N(0, K).
In this case `f` represents the values taken by v.
The method can either return the diagonals of the covariance matrix for
each output (default) or the full covariance matrix (full_cov=True).
We assume K independent GPs, represented by the columns of f (and the
last dimension of q_sqrt).
:param Xnew: data matrix, size N x D.
:param X: data points, size M x D.
:param kern: GPflow kernel.
:param f: data matrix, M x K, representing the function values at X,
for K functions.
:param q_sqrt: matrix of standard-deviations or Cholesky matrices,
size M x K or M x M x K.
:param white: boolean of whether to use the whitened representation as
described above.
:return: two element tuple with conditional mean and variance.
"""
num_data = tf.shape(X)[0] # M
Kmm = kern.K(X) + tf.eye(num_data, dtype=settings.float_type) * settings.numerics.jitter_level
Kmn = kern.K(X, Xnew)
if full_cov:
Knn = kern.K(Xnew)
else:
Knn = kern.Kdiag(Xnew)
return base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white)
def training_decoding_layer(
target_data,
target_lengths,
enc_output,
enc_output_lengths,
fst,
keep_prob):
''' Training decoding layer for the model.
Returns:
Training logits
'''
target_data = tf.concat(
[tf.fill([FLAGS.batch_size, 1], VOCAB_TO_INT['<s>']),
target_data[:, :-1]], 1)
dec_cell = get_dec_cell(
enc_output,
enc_output_lengths,
FLAGS.use_train_lm,
fst,
1,
keep_prob)
initial_state = dec_cell.zero_state(
dtype=tf.float32,
batch_size=FLAGS.batch_size)
target_data = tf.nn.embedding_lookup(
tf.eye(VOCAB_SIZE),
target_data)
training_helper = tf.contrib.seq2seq.TrainingHelper(
inputs=target_data,
sequence_length=target_lengths,
time_major=False)
training_decoder = tf.contrib.seq2seq.BasicDecoder(
dec_cell,
training_helper,
initial_state)
training_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(
training_decoder,
output_time_major=False,
impute_finished=True)
return training_logits
def inference_decoding_layer(
enc_output,
enc_output_lengths,
fst,
keep_prob):
''' Inference decoding layer for the model.
Returns:
Predictions
'''
dec_cell = get_dec_cell(
enc_output,
enc_output_lengths,
FLAGS.use_inference_lm,
fst,
FLAGS.beam_width,
keep_prob)
initial_state = dec_cell.zero_state(
dtype=tf.float32,
batch_size=FLAGS.batch_size * FLAGS.beam_width)
start_tokens = tf.fill(
[FLAGS.batch_size],
VOCAB_TO_INT['<s>'],
name='start_tokens')
inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
dec_cell,
tf.eye(VOCAB_SIZE),
start_tokens,
VOCAB_TO_INT['</s>'],
initial_state,
FLAGS.beam_width)
predictions, _, _ = tf.contrib.seq2seq.dynamic_decode(
inference_decoder,
output_time_major=False,
maximum_iterations=FLAGS.max_output_len)
return predictions
def __init__(self, X, y, M=10, max_iter = 2000, N_batch = 1,
monitor_likelihood = 10, lrate = 1e-3):
(N,D) = X.shape
# kmeans on a subset of data
N_subset = min(N, 10000)
idx = np.random.choice(N, N_subset, replace=False)
kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx,:])
Z = kmeans.cluster_centers_
hyp = np.log(np.ones(D+1))
logsigma_n = np.array([-4.0])
hyp = np.concatenate([hyp, logsigma_n])
m = np.zeros((M,1))
S = kernel(Z,Z,hyp[:-1])
self.X = X
self.y = y
self.M = M
self.Z = tf.Variable(Z,dtype=tf.float64,trainable=False)
self.K_u_inv = tf.Variable(np.eye(M),dtype=tf.float64,trainable=False)
self.m = tf.Variable(m,dtype=tf.float64,trainable=False)
self.S = tf.Variable(S,dtype=tf.float64,trainable=False)
self.nlml = tf.Variable(0.0, dtype=tf.float64, trainable=False)
self.hyp = hyp
self.max_iter = max_iter
self.N_batch = N_batch
self.monitor_likelihood = monitor_likelihood
self.jitter = 1e-8
self.jitter_cov = 1e-8
self.lrate = lrate
self.optimizer = tf.train.AdamOptimizer(self.lrate)
# Tensor Flow Session
# self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
self.sess = tf.Session()
def likelihood(self, hyp, X_batch, y_batch, monitor=False):
M = self.M
Z = self.Z
m = self.m
S = self.S
jitter = self.jitter
jitter_cov = self.jitter_cov
N = tf.shape(X_batch)[0]
logsigma_n = hyp[-1]
sigma_n = tf.exp(logsigma_n)
# Compute K_u_inv
K_u = kernel_tf(Z, Z, hyp[:-1])
L = tf.cholesky(K_u + np.eye(M)*jitter_cov)
K_u_inv = tf.matrix_triangular_solve(tf.transpose(L), tf.matrix_triangular_solve(L, np.eye(M), lower=True), lower=False)
K_u_inv_op = self.K_u_inv.assign(K_u_inv)
# Compute mu
psi = kernel_tf(Z, X_batch, hyp[:-1])
K_u_inv_m = tf.matmul(K_u_inv, m)
MU = tf.matmul(tf.transpose(psi), K_u_inv_m)
# Compute cov
Alpha = tf.matmul(K_u_inv, psi)
COV = kernel_tf(X_batch, X_batch, hyp[:-1]) - tf.matmul(tf.transpose(psi), tf.matmul(K_u_inv,psi)) + \
tf.matmul(tf.transpose(Alpha), tf.matmul(S,Alpha))
# Compute COV_inv
LL = tf.cholesky(COV + tf.eye(N, dtype=tf.float64)*sigma_n + tf.eye(N, dtype=tf.float64)*jitter)
COV_inv = tf.matrix_triangular_solve(tf.transpose(LL), tf.matrix_triangular_solve(LL, tf.eye(N, dtype=tf.float64), lower=True), lower=False)
# Compute cov(Z, X)
cov_ZX = tf.matmul(S,Alpha)
# Update m and S
alpha = tf.matmul(COV_inv, tf.transpose(cov_ZX))
m_new = m + tf.matmul(cov_ZX, tf.matmul(COV_inv, y_batch-MU))
S_new = S - tf.matmul(cov_ZX, alpha)
if monitor == False:
m_op = self.m.assign(m_new)
S_op = self.S.assign(S_new)
# Compute NLML
K_u_inv_m = tf.matmul(K_u_inv, m_new)
NLML = 0.5*tf.matmul(tf.transpose(m_new), K_u_inv_m) + tf.reduce_sum(tf.log(tf.diag_part(L))) + 0.5*np.log(2.*np.pi)*tf.cast(M, tf.float64)
train = self.optimizer.minimize(NLML)
nlml_op = self.nlml.assign(NLML[0,0])
return tf.group(*[train, m_op, S_op, nlml_op, K_u_inv_op])
def block_Lanczos(Sigma_func,B_,n_mc_smps):
"""
block Lanczos method to approx Sigma^1/2 * B, with B matrix of N(0,1)'s.
Used to generate multiple approximate large normal draws.
"""
n = tf.shape(B_)[0] #dim of the multivariate normal
s = n_mc_smps #number of samples to draw
k = tf.div(n,500) + 3 #number of Lanczos iterations
betas = tf.zeros([1,s])
alphas = tf.zeros([0,s])
D = tf.zeros([s,n,1])
B_norms = tf.norm(B_,axis=0)
D = tf.concat([D,tf.expand_dims(tf.transpose(B_/B_norms),2)],2)
def cond(j,alphas,betas,D):
return j < k+1
#TODO: use block-CG in place of Sigma
def body(j,alphas,betas,D):
d_j = tf.squeeze(tf.slice(D,[0,0,j],[-1,-1,1]))
d = Sigma_func(tf.transpose(d_j)) - (tf.slice(betas,[j-1,0],[1,-1])*
tf.transpose(tf.squeeze(tf.slice(D,[0,0,j-1],[-1,-1,1]))))
alphas = tf.concat([alphas,[tf.diag_part(tf.matmul(d_j,d))]],0)
d = d - tf.slice(alphas,[j-1,0],[1,-1])*tf.transpose(d_j)
betas = tf.concat([betas,[tf.norm(d,axis=0)]],0)
D = tf.concat([D,tf.expand_dims(tf.transpose(d/tf.slice(betas,[j,0],[1,-1])),2)],2)
return j+1,alphas,betas,D
j = tf.constant(1)
j,alphas,betas,D = tf.while_loop(cond,body,loop_vars=[j,alphas,betas,D],
shape_invariants=[j.get_shape(),tf.TensorShape([None,None]),
tf.TensorShape([None,None]),tf.TensorShape([None,None,None])])
D_ = tf.slice(D,[0,0,1],[-1,-1,k])
##TODO: replace loop
H = tf.zeros([0,k,k])
for ss in range(s):
this_beta = tf.diag(tf.squeeze(tf.slice(betas,[1,ss],[k-1,1])))
#build out tridiagonal H: alphas_1:k on main, betas_2:k on off
this_H = (tf.diag(tf.squeeze(tf.slice(alphas,[0,ss],[-1,1]))) +
tf.pad(this_beta,[[1,0],[0,1]]) +
tf.pad(this_beta,[[0,1],[1,0]]))
H = tf.concat([H,tf.expand_dims(this_H,0)],0)
E,V = tf.self_adjoint_eig(H)
E_sqrt = tf.zeros([0,k,k])
#TODO: replace loop
for ss in range(s):
#ensure positive definite
E_sqrt = tf.concat([E_sqrt,tf.expand_dims(tf.diag(tf.squeeze(tf.sqrt(tf.maximum(tf.slice(E,[ss,0],[1,-1]),1e-6)))),0)],0)
sq_H = tf.matmul(V,tf.matmul(E_sqrt,tf.transpose(V,perm=[0,2,1])))
e1 = tf.expand_dims(tf.transpose(tf.tile(tf.slice(tf.eye(k),[0,0],[-1,1]),[1,s])),2)
out = B_norms*tf.transpose(tf.squeeze(tf.matmul(D_,tf.matmul(sq_H,e1))))
return out
def __init__(self, dim_z, dim_y, dim_u=0, dim_k=1, **kwargs):
self.dim_z = dim_z
self.dim_y = dim_y
self.dim_u = dim_u
self.dim_k = dim_k
# Initializer for identity matrix
self.eye_init = lambda shape, dtype=np.float32: np.eye(*shape, dtype=dtype)
# Pop all variables
init = kwargs.pop('mu', np.zeros((dim_z, ), dtype=np.float32))
self.mu = tf.get_variable('mu', initializer=init, trainable=False) # state
init = kwargs.pop('Sigma', self.eye_init((dim_z, dim_z))).astype(np.float32)
self.Sigma = tf.get_variable('Sigma', initializer=init, trainable=False) # uncertainty covariance
init = kwargs.pop('y_0', np.zeros((dim_y,))).astype(np.float32)
self.y_0 = tf.get_variable('y_0', initializer=init) # initial output
init = kwargs.pop('A', self.eye_init((dim_z, dim_z)))
self.A = tf.get_variable('A', initializer=init)
init = kwargs.pop('B', self.eye_init((dim_z, dim_u))).astype(np.float32)
self.B = tf.get_variable('B', initializer=init) # control transition matrix
init = kwargs.pop('Q', self.eye_init((dim_z, dim_z))).astype(np.float32)
self.Q = tf.get_variable('Q', initializer=init, trainable=False) # process uncertainty
init = kwargs.pop('C', self.eye_init((dim_y, dim_z))).astype(np.float32)
self.C = tf.get_variable('C', initializer=init) # Measurement function
init = kwargs.pop('R', self.eye_init((dim_y, dim_y))).astype(np.float32)
self.R = tf.get_variable('R', initializer=init, trainable=False) # state uncertainty
self._alpha_sq = tf.constant(1., dtype=tf.float32) # fading memory control
self.M = 0 # process-measurement cross correlation
# identity matrix
self._I = tf.constant(self.eye_init((dim_z, dim_z)), name='I')
# Get variables that are possibly defined with tensors
self.y = kwargs.pop('y', None)
if self.y is None:
self.y = tf.placeholder(tf.float32, shape=(None, None, dim_y), name='y')
self.u = kwargs.pop('u', None)
if self.u is None:
self.u = tf.placeholder(tf.float32, shape=(None, None, dim_u), name='u')
self.mask = kwargs.pop('mask', None)
if self.mask is None:
self.mask = tf.placeholder(tf.float32, shape=(None, None), name='mask')
self.alpha = kwargs.pop('alpha', None)
self.state = kwargs.pop('state', None)
self.log_likelihood = None