def compute_gradient_totalcverr_wrt_eta(self,matrix_results,lambda_val,sigmasq_z):
# 0: K_tst_tr; 1: K_tr_tr; 2: D_tst_tr; 3: D_tr_tr
#eta = log(lambda_val)
#gamma = log(sigmasq_z)
num_sample_cv = self.num_samples
ttl_num_folds = np.shape(matrix_results)[1]
gradient_cverr_per_fold = np.zeros(ttl_num_folds)
for jj in range(ttl_num_folds):
uu = np.shape(matrix_results[3][jj])[0] # number of training samples
M_tst_tr = exp(matrix_results[2][jj]*float(-1/2)*sigmasq_z**(-1))
M_tr_tr = exp(matrix_results[3][jj]*float(-1/2)*sigmasq_z**(-1))
lower_ZZ = cholesky(M_tr_tr+ lambda_val*eye(uu), lower=True)
ZZ = cho_solve((lower_ZZ,True),eye(uu))
EE = lambda_val*eye(uu)
first_term = matrix_results[0][jj].dot(ZZ.dot(EE.dot(ZZ.dot(M_tst_tr.T))))
second_term = first_term.T
third_term = -M_tst_tr.dot(ZZ.dot(EE.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(M_tst_tr.T))))))
forth_term = -M_tst_tr.dot(ZZ.dot(
matrix_results[1][jj].dot(ZZ.dot(EE.dot(ZZ.dot(M_tst_tr.T))))))
gradient_cverr_per_fold[jj] = trace(first_term + second_term + third_term + forth_term)
return sum(gradient_cverr_per_fold)/float(num_sample_cv)
# compute the gradient of the total cverror with respect to sigma_z squared
评论列表
文章目录