def infExact_scipy_post(self, K, covars, y, sig2e, fixedEffects):
n = y.shape[0]
#mean vector
m = covars.dot(fixedEffects)
if (K.shape[1] < K.shape[0]): K_true = K.dot(K.T)
else: K_true = K
if sig2e<1e-6:
L = la.cholesky(K_true + sig2e*np.eye(n), overwrite_a=True, check_finite=False) #Cholesky factor of covariance with noise
sl = 1
pL = -self.solveChol(L, np.eye(n)) #L = -inv(K+inv(sW^2))
else:
L = la.cholesky(K_true/sig2e + np.eye(n), overwrite_a=True, check_finite=False) #Cholesky factor of B
sl = sig2e
pL = L #L = chol(eye(n)+sW*sW'.*K)
alpha = self.solveChol(L, y-m, overwrite_b=False) / sl
post = dict([])
post['alpha'] = alpha #return the posterior parameters
post['sW'] = np.ones(n) / np.sqrt(sig2e) #sqrt of noise precision vector
post['L'] = pL
return post
python类cholesky()的实例源码
def linear_hotelling_test(X, Y, reg=0):
n, p = X.shape
Z = X - Y
Z_bar = Z.mean(axis=0)
Z -= Z_bar
S = Z.T.dot(Z)
S /= (n - 1)
if reg:
S[::p + 1] += reg
# z' inv(S) z = z' inv(L L') z = z' inv(L)' inv(L) z = ||inv(L) z||^2
L = linalg.cholesky(S, lower=True, overwrite_a=True)
Linv_Z_bar = linalg.solve_triangular(L, Z_bar, lower=True, overwrite_b=True)
stat = n * Linv_Z_bar.dot(Linv_Z_bar)
p_val = stats.chi2.sf(stat, p)
return p_val, stat
def compute_gradient_totalcverr_wrt_lambda(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0] # number of training samples
M_tst_tr = exp(matrix_results[2][jj]*float(-1/2)*sigmasq_z**(-1))
M_tr_tr = exp(matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1))
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
first_term = matrix_results[0][jj].dot(ZZ.dot(ZZ.dot(M_tst_tr.T)))
second_term = M_tst_tr.dot(ZZ.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(M_tst_tr.T)))))
gradient_cverr_per_fold[jj] = trace(first_term-second_term)
return 2*sum(gradient_cverr_per_fold)/float(num_sample_cv)
# lambda = exp(eta)
def compute_gradient_totalcverr_wrt_sqsigma(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0]
log_M_tr_tst = matrix_results[2][jj].T*float(-1/2)*sigmasq_z**(-1)
M_tr_tst = exp(log_M_tr_tst)
log_M_tr_tr = matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1)
M_tr_tr = exp(log_M_tr_tr)
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
term_1 = matrix_results[0][jj].dot(ZZ.dot((M_tr_tr*sigmasq_z**(-1)*(-log_M_tr_tr)).dot(ZZ.dot(M_tr_tst))))
term_2 = -matrix_results[0][jj].dot(ZZ.dot(M_tr_tst*(-log_M_tr_tst*sigmasq_z**(-1))))
term_3 = (sigmasq_z**(-1)*(M_tr_tst.T)*(-log_M_tr_tst.T)).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot(M_tr_tst))))
term_4 = -(M_tr_tst.T).dot(ZZ.dot((M_tr_tr*sigmasq_z**(-1)*(-log_M_tr_tr)).dot(ZZ.dot(matrix_results[1][jj].dot(
ZZ.dot(M_tr_tst))))))
term_5 = -(M_tr_tst.T).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot((M_tr_tr*sigmasq_z**(-1)*(-log_M_tr_tr)).dot(
ZZ.dot(M_tr_tst))))))
term_6 = (M_tr_tst.T).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot(M_tr_tst*sigmasq_z**(-1)*(-log_M_tr_tst)))))
gradient_cverr_per_fold[jj] = trace(2*term_1 + 2*term_2 + term_3 + term_4 + term_5 + term_6)
return sum(gradient_cverr_per_fold)/float(num_sample_cv)
def compute_totalcverr(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: K_tst_tst; 3: D_tst_tr; 4: D_tr_tr
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[4][jj])[0] # number of training samples
M_tst_tr = exp(matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1))
M_tr_tr = exp(matrix_results[4][jj]*float(-1/2)*sigmasq_z**(-1))
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
first_term = matrix_results[2][jj]
second_term = - matrix_results[0][jj].dot(ZZ.dot(M_tst_tr.T))
third_term = np.transpose(second_term)
fourth_term = M_tst_tr.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(M_tst_tr.T))))
cverr_per_fold[jj] = trace(first_term + second_term + third_term + fourth_term)
return sum(cverr_per_fold)/float(num_sample_cv)
def predict(self, a_hist, t):
"""
This function implements the prediction formula discussed is section 6 (1.59)
It takes a realization for a^N, and the period in which the prediciton is formed
Output: E[abar | a_t, a_{t-1}, ..., a_1, a_0]
"""
N = np.asarray(a_hist).shape[0] - 1
a_hist = np.asarray(a_hist).reshape(N + 1, 1)
V = self.construct_V(N + 1)
aux_matrix = np.zeros((N + 1, N + 1))
aux_matrix[:(t + 1), :(t + 1)] = np.eye(t + 1)
L = la.cholesky(V).T
Ea_hist = la.inv(L) @ aux_matrix @ L @ a_hist
return Ea_hist
def _kalman_correct(x, P, z, H, R, gain_factor, gain_curve):
PHT = np.dot(P, H.T)
S = np.dot(H, PHT) + R
e = z - H.dot(x)
L = cholesky(S, lower=True)
inn = solve_triangular(L, e, lower=True)
if gain_curve is not None:
q = (np.dot(inn, inn) / inn.shape[0]) ** 0.5
f = gain_curve(q)
if f == 0:
return inn
L *= (q / f) ** 0.5
K = cho_solve((L, True), PHT.T, overwrite_b=True).T
if gain_factor is not None:
K *= gain_factor[:, None]
U = -K.dot(H)
U[np.diag_indices_from(U)] += 1
x += K.dot(z - H.dot(x))
P[:] = U.dot(P).dot(U.T) + K.dot(R).dot(K.T)
return inn
def factor(X, rho):
"""
computes cholesky factorization of the kernel K = 1/rho*XX^T + I
Input:
X design matrix: n_s x n_f (we assume n_s << n_f)
rho: regularizaer
Output:
L lower triangular matrix
U upper triangular matrix
"""
n_s, n_f = X.shape
K = 1 / rho * scipy.dot(X, X.T) + scipy.eye(n_s)
U = linalg.cholesky(K)
return U
test_unconstrained.py 文件源码
项目:hamiltonian-monte-carlo
作者: matt-graham
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def test_kinetic_energy(self):
mom_resample_coeff = 1.
dtype = np.float64
for n_dim in [10, 100, 1000]:
mass_matrix = self.prng.normal(size=(n_dim, n_dim))
mass_matrix = mass_matrix.dot(mass_matrix.T)
mass_matrix_chol = la.cholesky(mass_matrix, lower=True)
sampler = uhmc.EuclideanMetricHmcSampler(
energy_func=energy_func,
mass_matrix=mass_matrix,
energy_grad=energy_grad,
prng=self.prng,
mom_resample_coeff=mom_resample_coeff,
dtype=dtype)
pos, mom = self.prng.normal(size=(2, n_dim,)).astype(dtype)
k_energy = sampler.kinetic_energy(pos, mom, {})
assert np.isscalar(k_energy), (
'kinetic_energy returning non-scalar value.'
)
assert np.allclose(
k_energy,
0.5 * mom.dot(la.cho_solve((mass_matrix_chol, True), mom))), (
'kinetic_energy returning incorrect value.'
)
def TrainSSHNOPL(data, dataL, adjMat, nbit, eta, rau):
datamean=npy.mean(data, axis=0)
data=data-datamean
dataL=dataL-datamean
covMatU=npy.dot(data.T, data)
covMatL=npy.dot(dataL.T, npy.dot(adjMat, dataL))
covMat=eta*covMatU+(1-eta)*covMatL
eigVals, eigVecs=npy.linalg.eig(covMat)
idxSort=npy.argsort(npy.abs(eigVals))
rau=npy.max([rau, npy.max([0, -npy.abs(eigVals[idxSort[0]])])])
covMatReg=(covMat+rau*npy.eye(covMat.shape[0]))
matL= linalg.cholesky(covMatReg, lower=False)
nbit=npy.min([nbit,data.shape[1]])
projMat=npy.dot(matL, eigVecs[:,idxSort[-1:-nbit-1:-1]])
modelSSH={"datamean":datamean, "projMat":projMat}
return modelSSH
def test_step_M(self,window,update):
points = np.random.randn(self.n_points,self.dim)
GM = GaussianMixture(self.n_components,window=window,update=update)
GM.initialize(points)
_,log_resp = GM._step_E(points[:GM.get('window'):])
GM._sufficient_statistics(points[:GM.get('window'):],log_resp)
log_weights = np.log(GM.get('N'))
means = GM.get('X') / GM.get('N')[:,np.newaxis]
cov = GM.get('S') / GM.get('N')[:,np.newaxis,np.newaxis]
cov_chol = np.empty_like(cov)
for i in range(self.n_components):
cov_chol[i] = linalg.cholesky(cov[i],lower=True)
GM._step_M()
assert_almost_equal(log_weights,GM.get('log_weights'))
assert_almost_equal(means,GM.get('means'))
assert_almost_equal(cov,GM.get('cov'))
assert_almost_equal(cov_chol,GM.get('cov_chol'))
def test_log_normal_matrix_full():
n_points, n_components, n_features = 10,5,2
points = np.random.randn(n_points,n_features)
means = np.random.randn(n_components,n_features)
cov = generate.generate_covariance_matrices_full(n_components,n_features)
cov_chol = np.empty((n_components,n_features,n_features))
for i in range(n_components):
cov_chol[i] = linalg.cholesky(cov[i],lower=True)
# Beginnig of the test
log_det_cov = np.log(np.linalg.det(cov))
precisions = np.linalg.inv(cov)
log_prob = np.empty((n_points,n_components))
for i in range(n_components):
diff = points - means[i]
y = np.dot(diff,np.dot(precisions[i],diff.T))
log_prob[:,i] = np.diagonal(y)
expected_log_normal_matrix = -0.5 * (n_features * np.log(2*np.pi) +
log_prob + log_det_cov)
predected_log_normal_matrix = _log_normal_matrix(points,means,cov_chol,'full')
assert_almost_equal(expected_log_normal_matrix,predected_log_normal_matrix)
def fit(self, X, y):
"""Fit the Bayesian linear regression model leveraging the available
data.
Parameters:
X (numpy array): A two-dimensional numpy array representing the
matrix of covariates. Note that if a bias term is expressly
desired, it must be included in the design matrix.
y (numpy array): A matrix of target variables to predict.
"""
P = X.T.dot(X) + self.prior_prec
L = spla.cholesky(P, lower=True)
self.mu = spla.cho_solve((L, True), X.T.dot(y))
self.sigma_sq = np.mean((X.dot(self.mu) - y) ** 2)
L_inv = spla.solve_triangular(L.T, np.eye(L.shape[0]))
self.cov = self.sigma_sq * L_inv.dot(L_inv.T)
def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7):
"""Log probability for full covariance matrices."""
n_samples, n_dim = X.shape
nmix = len(means)
log_prob = np.empty((n_samples, nmix))
for c, (mu, cv) in enumerate(zip(means, covars)):
try:
cv_chol = linalg.cholesky(cv, lower=True)
except linalg.LinAlgError:
# The model is most probably stuck in a component with too
# few observations, we need to reinitialize this components
try:
cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
lower=True)
except linalg.LinAlgError:
raise ValueError("'covars' must be symmetric, "
"positive-definite")
cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) +
n_dim * np.log(2 * np.pi) + cv_log_det)
return log_prob
def factor(X, rho, mu=0.0):
n, d = X.shape
if n >= d:
L = la.cholesky((2. / n) * np.dot(X.T, X) + (2. * mu + rho) * np.eye(d), lower=True)
else:
L = la.cholesky(np.eye(n) + (2. / (rho * n)) * np.dot(X, X.T), lower=True)
return L, L.T # L, U
def matrixInverse(M):
return chol2inv(spla.cholesky(M, lower=False))
# hyperparameters
def matrixInverse(M):
return chol2inv(spla.cholesky(M, lower=False))
def predict(self, r, rnext, wf0, Cf, dt):
"""
r: Desired reference state at time t
rnext: Desired reference state at time t+dt
wf0: Mean of the process noise
Cf: Covariance of the process noise
dt: Timestep to predict forward
Progresses the state estimate one dt into the future.
Returns the control effort u.
"""
# Compute control action
self.u = self.g(r, rnext, self.x, self.Cx, dt)
# Store relevant parameters
utp = self.utpDict['_f']
# Compute sigma points, propagate through process, and store tangent-space representation
M = spl.cholesky(utp[0]*spl.block_diag(self.Cx, Cf))
s0 = np.append(self.x, wf0)
fS = [self.sf(s0, dt)]
fT_sum = np.zeros(self.n_m)
for Vi in np.vstack((M, -M)):
fS.append(self.sf(self.sxplus(s0, Vi), dt))
fT_sum += self.xminus(fS[-1], fS[0])
# Determine the mean of the propagated sigma points from the tangent vectors
self.x = self.xplus(fS[0], utp[2]*fT_sum)
# Determine the covariance from the tangent-space deviations from the mean
fv0 = self.xminus(fS[0], self.x)
fPi_sum = np.zeros((self.n_m, self.n_m))
for fSi in fS[1:]:
fv = self.xminus(fSi, self.x)
fPi_sum += np.outer(fv, fv)
self.Cx = utp[3]*np.outer(fv0, fv0) + utp[2]*fPi_sum
# Over and out
return np.copy(self.u)
def correct(self, sensor, z, wh0, Ch):
"""
sensor: String of the sensor key name
z: Value of the measurement
wh0: Mean of that sensor's noise
Ch: Covariance of that sensor's noise
Updates the state estimate with the given sensor measurement.
"""
# Store relevant functions and parameters
h, zminus, n_z, _, utp = self.hDict_full[sensor]
# Compute sigma points and emulate their measurement error
M = spl.cholesky(utp[0]*spl.block_diag(self.Cx, Ch))
V = np.vstack((M, -M))
s0 = np.append(self.x, wh0)
hE = [zminus(z, self.sh(s0, h))]
for i, Vi in enumerate(V):
hE.append(zminus(z, self.sh(self.sxplus(s0, Vi), h)))
hE = np.array(hE)
# Determine the mean of the sigma measurement errors
e0 = utp[1]*hE[0] + utp[2]*np.sum(hE[1:], axis=0)
# Determine the covariance and cross-covariance
hV = hE - e0
hPi_sum = np.zeros((n_z, n_z))
hPci_sum = np.zeros((self.n_m, n_z))
for Vqi, hVi in zip(V[:, :self.n_m], hV[1:]):
hPi_sum += np.outer(hVi, hVi)
hPci_sum += np.outer(Vqi, hVi)
Pzz = utp[3]*np.outer(hV[0], hV[0]) + utp[2]*hPi_sum
Pxz = utp[2]*hPci_sum
# Kalman update the state estimate and covariance
K = Pxz.dot(npl.inv(Pzz))
self.x = self.xplus(self.x, -K.dot(e0))
self.Cx = self.Cx - K.dot(Pzz).dot(K.T)
def sigma_points(self, X, P):
sigmas = np.zeros((2 * self.n + 1, self.n))
U = cholesky((self.n + self.l) * P)
sigmas[0] = X
for k in range (self.n):
sigmas[k + 1] = X + U[k]
sigmas[self.n + k + 1] = X - U[k]
return sigmas
def __init__(self,gamma,beta,nugget,kernelName,k_lambda,xTrain,yTrain):
"""
Create a new GaussianProcess Object
gamma: Hyperparameter
beta: Hyperparameter
k_lambda: Hyperparameter
nugget: The noise hyperparameter
kernelName: The name of the covariance kernel
xTrain: Numpy array containing x training values
yTrain: Numpy array containing y training values
"""
self.xTrain = xTrain
self.yTrain = yTrain
self.k_lambda = k_lambda
self.beta = beta
self.gamma = gamma
self.nugget = nugget
self.kernelName = kernelName
# Setup the regressor as if gp.fit had been called
# See https://github.com/scikit-learn/scikit-learn/master/sklearn/gaussian_process/gpr.py
kernel = self._getKernel()
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=0)
gp.K = kernel(xTrain);
gp.X_train_ = xTrain;
gp.y_train_ = yTrain;
gp.L_ = cholesky(gp.K, lower=True)
gp.alpha_ = cho_solve((gp.L_, True), yTrain)
gp.fit(xTrain,yTrain)
gp.kernel_ = kernel
self.gp = gp
self.kernel = kernel
# Calculate the matrix inverses once. Save time later
# This is only used for own own implimentation of the scoring engine
self.L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0]))
self.K_inv = L_inv.dot(L_inv.T)
def fit(self, X, y):
"""
Fits a Gaussian Process regressor
Parameters
----------
X: np.ndarray, shape=(nsamples, nfeatures)
Training instances to fit the GP.
y: np.ndarray, shape=(nsamples,)
Corresponding continuous target values to X.
"""
self.X = X
self.y = y
self.nsamples = self.X.shape[0]
if self.optimize:
grads = None
if self.usegrads:
grads = self._grad
self.optHyp(param_key=self.covfunc.parameters, param_bounds=self.covfunc.bounds, grads=grads)
self.K = self.covfunc.K(self.X, self.X)
self.L = cholesky(self.K).T
self.alpha = solve(self.L.T, solve(self.L, y - self.mprior))
self.logp = -.5 * np.dot(self.y, self.alpha) - np.sum(np.log(np.diag(self.L))) - self.nsamples / 2 * np.log(
2 * np.pi)
def param_grad(self, k_param):
"""
Returns gradient over hyperparameters. It is recommended to use `self._grad` instead.
Parameters
----------
k_param: dict
Dictionary with keys being hyperparameters and values their queried values.
Returns
-------
np.ndarray
Gradient corresponding to each hyperparameters. Order given by `k_param.keys()`
"""
k_param_key = list(k_param.keys())
covfunc = self.covfunc.__class__(**k_param)
n = self.X.shape[0]
K = covfunc.K(self.X, self.X)
L = cholesky(K).T
alpha = solve(L.T, solve(L, self.y))
inner = np.dot(np.atleast_2d(alpha).T, np.atleast_2d(alpha)) - np.linalg.inv(K)
grads = []
for param in k_param_key:
gradK = covfunc.gradK(self.X, self.X, param=param)
gradK = .5 * np.trace(np.dot(inner, gradK))
grads.append(gradK)
return np.array(grads)
def compute_gradient_totalcverr_wrt_eta(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
#eta = log(lambda_val)
#gamma = log(sigmasq_z)
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0] # number of training samples
M_tst_tr = exp(matrix_results[2][jj]*float(-1/2)*sigmasq_z**(-1))
M_tr_tr = exp(matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1))
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
EE = lambda_val*eye(uu)
first_term = matrix_results[0][jj].dot(ZZ.dot(EE.dot(ZZ.dot(M_tst_tr.T))))
second_term = first_term.T
third_term = -M_tst_tr.dot(ZZ.dot(EE.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(M_tst_tr.T))))))
forth_term = -M_tst_tr.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(EE.dot(ZZ.dot(M_tst_tr.T))))))
gradient_cverr_per_fold[jj] = trace(first_term + second_term + third_term + forth_term)
return sum(gradient_cverr_per_fold)/float(num_sample_cv)
# compute the gradient of the total cverror with respect to sigma_z squared
def compute_gradient_totalcverr_wrt_gamma(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
#eta = log(lambda_val)
#gamma = log(sigmasq_z)
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0]
log_M_tr_tst = matrix_results[2][jj].T*float(-1/2)*sigmasq_z**(-1)
M_tr_tst = exp(log_M_tr_tst)
log_M_tr_tr = matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1)
M_tr_tr = exp(log_M_tr_tr)
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
term_1 = matrix_results[0][jj].dot(ZZ.dot((M_tr_tr*(-log_M_tr_tr)).dot(ZZ.dot(M_tr_tst))))
term_2 = -matrix_results[0][jj].dot(ZZ.dot(M_tr_tst*(-log_M_tr_tst)))
term_3 = (M_tr_tst.T*(-log_M_tr_tst).T).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot(M_tr_tst))))
term_4 = -(M_tr_tst.T).dot(ZZ.dot((M_tr_tr*(-log_M_tr_tr)).dot(ZZ.dot(matrix_results[1][jj].dot(
ZZ.dot(M_tr_tst))))))
term_5 = -(M_tr_tst.T).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot((M_tr_tr*(-log_M_tr_tr)).dot(
ZZ.dot(M_tr_tst))))))
term_6 = (M_tr_tst.T).dot(ZZ.dot(matrix_results[1][jj].dot(ZZ.dot(M_tr_tst*(-log_M_tr_tst)))))
gradient_cverr_per_fold[jj] = trace(2*term_1 + 2*term_2 + term_3 + term_4 + term_5 + term_6)
return sum(gradient_cverr_per_fold)/float(num_sample_cv)
# compute the total cverror
def expCovar(xs,kappa,chol=False):
'''
Given a spatial mesh x, return an exponential
covariance matrix with inverse correlation length
kappa.
'''
x,y = np.meshgrid(xs,xs)
rv = np.exp(-1.0*kappa*abs(x-y))
if chol:
return spal.cholesky(rv)
return rv
def _rts_pass(x, P, xa, Pa, Phi):
n_points, n_states = x.shape
I = np.identity(n_states)
for i in reversed(range(n_points - 1)):
L = cholesky(Pa[i + 1], check_finite=False)
Pa_inv = cho_solve((L, False), I, check_finite=False)
C = P[i].dot(Phi[i].T).dot(Pa_inv)
x[i] += C.dot(x[i + 1] - xa[i + 1])
P[i] += C.dot(P[i + 1] - Pa[i + 1]).dot(C.T)
return x, P
def min_div_est(self, LU, nframes):
Lu = np.zeros((self.tv_dim, self.tv_dim))
Lu[self.itril] = LU.sum(0) / nframes
Lu += np.tril(Lu, -1).T
self.Tm = cholesky(Lu).dot(self.Tm)
def ssgpr(self, X, kern, S, Y):
[N, D] = X.shape
m = len(S)/D
W = np.reshape(S, (m, D), order='F')
phi = np.dot(X, W.T)
phi = np.hstack((np.cos(phi), np.sin(phi)))
A = np.dot(phi.T, phi) + kern.noise*np.identity(2*m)
R = cholesky(A, lower=False)
PhiRi = np.linalg.lstsq(R.T, phi.T)[0] # PhiRi = phi/R
Rtphity = np.dot(PhiRi, Y.flatten())
return 0.5/kern.noise*(np.sum(np.power(Y,2))-kern.noise/m*np.sum(np.power(Rtphity,2))) + np.sum(np.log(np.diag(R))) + (N/2 - m)*np.log(kern.noise)+N/2*np.log(2*np.pi)
def ssgpr(self, X, kern, S, Y):
[N, D] = X.shape
m = len(S)/D
W = np.reshape(S, (m, D), order='F')
phi = np.dot(X, W.T)
phi = np.hstack((np.cos(phi), np.sin(phi)))
A = np.dot(phi.T, phi) + kern.noise*np.identity(2*m)
R = cholesky(A, lower=False)
PhiRi = np.linalg.lstsq(R.T, phi.T)[0] # PhiRi = phi/R
Rtphity = np.dot(PhiRi, Y.flatten())
return 0.5/kern.noise*(np.sum(np.power(Y,2))-kern.noise/m*np.sum(np.power(Rtphity,2))) + np.sum(np.log(np.diag(R))) + (N/2 - m)*np.log(kern.noise)+N/2*np.log(2*np.pi)