def compute_gradient_totalcverr_wrt_lambda(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0] # number of training samples
M_tst_tr = exp(matrix_results[2][jj]*float(-1/2)*sigmasq_z**(-1))
M_tr_tr = exp(matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1))
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
first_term = matrix_results[0][jj].dot(ZZ.dot(ZZ.dot(M_tst_tr.T)))
second_term = M_tst_tr.dot(ZZ.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(M_tst_tr.T)))))
gradient_cverr_per_fold[jj] = trace(first_term-second_term)
return 2*sum(gradient_cverr_per_fold)/float(num_sample_cv)
# lambda = exp(eta)
评论列表
文章目录