def solve(self, x, w):
# Check that w is a vector or a nx1 matrix
if w.ndim == 2:
assert(w.shape[1] == 1)
elif w.dim == 1:
w = w.reshape(w.shape[0], 1)
A_smooth = (self.Dm - self.Dn.dot(self.grid.blur(self.Dn)))
w_splat = self.grid.splat(w)
A_data = diags(w_splat[:,0], 0)
A = self.params["lam"] * A_smooth + A_data
xw = x * w
b = self.grid.splat(xw)
# Use simple Jacobi preconditioner
A_diag = np.maximum(A.diagonal(), self.params["A_diag_min"])
M = diags(1 / A_diag, 0)
# Flat initialization
y0 = self.grid.splat(xw) / w_splat
yhat = np.empty_like(y0)
for d in xrange(x.shape[-1]):
yhat[..., d], info = cg(A, b[..., d], x0=y0[..., d], M=M, maxiter=self.params["cg_maxiter"], tol=self.params["cg_tol"])
xhat = self.grid.slice(yhat)
return xhat
python类cg()的实例源码
def ordinaryWalk(self):
tick = time.time()
alpha = self.param['alpha']
n = self.graph.shape[0]
c = self.Y.shape[1]
nf = self.param['normalize_factor']
#self.graph = self.graph + 3000 * sparse.eye(n,n)
Di = sparse.diags([np.sqrt(1 / (self.graph.sum(axis=0) + nf * np.ones(n))).getA1()], [0])
S = Di.dot(self.graph.dot(Di))
S_iter = (sparse.eye(n) - alpha * S).tocsc()
F = np.zeros((n,c))
for i in range(c):
F[:, i], info = slin.cg(S_iter, self.Y[:, i], tol=1e-12)
toc = time.time()
#print np.where(F > 0)
self.ElapsedTime = toc - tick
self.PredictedProbs = F
def variantWalk(self):
tick = time.time()
alpha = self.param['alpha']
n = self.graph.shape[0]
c = self.Y.shape[1]
nf = self.param['normalize_factor']
data = (self.graph.sum(axis=0) + nf * np.ones(n)).ravel()
Di = sparse.spdiags(data,0,n,n).tocsc()
S_iter = (Di - alpha * self.graph).tocsc()
F = np.zeros((n, c))
for i in range(c):
F[:, i], info = slin.cg(S_iter, self.Y[:, i], tol=1e-10)
toc = time.time()
self.ElapsedTime = toc - tick
self.PredictedProbs = F
def ngstep(x0, obj0, objgrad0, obj_and_kl_func, hvpx0_func, max_kl, damping, max_cg_iter,
enable_bt):
'''
Natural gradient step using hessian-vector products
Args:
x0: current point
obj0: objective value at x0
objgrad0: grad of objective value at x0
obj_and_kl_func: function mapping a point x to the objective and kl values
hvpx0_func: function mapping a vector v to the KL Hessian-vector product H(x0)v
max_kl: max kl divergence limit. Triggers a line search.
damping: multiple of I to mix with Hessians for Hessian-vector products
max_cg_iter: max conjugate gradient iterations for solving for natural gradient step
'''
assert x0.ndim == 1 and x0.shape == objgrad0.shape
# Solve for step direction
damped_hvp_func = lambda v: hvpx0_func(v) + damping * v
hvpop = ssl.LinearOperator(shape=(x0.shape[0], x0.shape[0]), matvec=damped_hvp_func)
step, _ = ssl.cg(hvpop, -objgrad0, maxiter=max_cg_iter)
fullstep = step / np.sqrt(.5 * step.dot(damped_hvp_func(step)) / max_kl + 1e-8)
# Line search on objective with a hard KL wall
if not enable_bt:
return x0 + fullstep, 0
def barrierobj(p):
obj, kl = obj_and_kl_func(p)
return np.inf if kl > 2 * max_kl else obj
xnew, num_bt_steps = btlinesearch(f=barrierobj, x0=x0, fx0=obj0, g=objgrad0, dx=fullstep,
accept_ratio=.1, shrink_factor=.5, max_steps=10)
return xnew, num_bt_steps
def fit(self, Xt, yt):
self.Xt = Xt
x0 = np.zeros(Xt.shape[0])
# returns an approximate solution of the inner optimization
K = pairwise_kernels(Xt, gamma=np.exp(self.alpha0[0]), metric='rbf')
(out, success) = splinalg.cg(
K + np.exp(self.alpha0[1]) * np.eye(x0.size), yt, x0=x0)
if success is False:
raise ValueError
self.dual_coef_ = out
def ngstep(x0, obj0, objgrad0, obj_and_kl_func, hvpx0_func, max_kl, damping, max_cg_iter, enable_bt):
'''
Natural gradient step using hessian-vector products
Args:
x0: current point
obj0: objective value at x0
objgrad0: grad of objective value at x0
obj_and_kl_func: function mapping a point x to the objective and kl values
hvpx0_func: function mapping a vector v to the KL Hessian-vector product H(x0)v
max_kl: max kl divergence limit. Triggers a line search.
damping: multiple of I to mix with Hessians for Hessian-vector products
max_cg_iter: max conjugate gradient iterations for solving for natural gradient step
'''
assert x0.ndim == 1 and x0.shape == objgrad0.shape
# Solve for step direction
damped_hvp_func = lambda v: hvpx0_func(v) + damping*v
hvpop = ssl.LinearOperator(shape=(x0.shape[0], x0.shape[0]), matvec=damped_hvp_func)
step, _ = ssl.cg(hvpop, -objgrad0, maxiter=max_cg_iter)
fullstep = step / np.sqrt(.5 * step.dot(damped_hvp_func(step)) / max_kl + 1e-8)
# Line search on objective with a hard KL wall
if not enable_bt:
return x0+fullstep, 0
def barrierobj(p):
obj, kl = obj_and_kl_func(p)
return np.inf if kl > 2*max_kl else obj
xnew, num_bt_steps = btlinesearch(
f=barrierobj,
x0=x0,
fx0=obj0,
g=objgrad0,
dx=fullstep,
accept_ratio=.1, shrink_factor=.5, max_steps=10)
return xnew, num_bt_steps
def solvemdbi_cg(ah, rho, b, axisM, axisK, tol=1e-5, mit=1000, isn=None):
r"""
Solve a multiple diagonal block linear system with a scaled
identity term using Conjugate Gradient (CG) via
:func:`scipy.sparse.linalg.cg`.
The solution is obtained by independently solving a set of linear
systems of the form (see :cite:`wohlberg-2016-efficient`)
.. math::
(\rho I + \mathbf{a}_0 \mathbf{a}_0^H + \mathbf{a}_1 \mathbf{a}_1^H +
\; \ldots \; + \mathbf{a}_{K-1} \mathbf{a}_{K-1}^H) \; \mathbf{x} =
\mathbf{b}
where each :math:`\mathbf{a}_k` is an :math:`M`-vector.
The inner products and matrix products in this equation are taken
along the M and K axes of the corresponding multi-dimensional arrays;
the solutions are independent over the other axes.
Parameters
----------
ah : array_like
Linear system component :math:`\mathbf{a}^H`
rho : float
Parameter rho
b : array_like
Linear system component :math:`\mathbf{b}`
axisM : int
Axis in input corresponding to index m in linear system
axisK : int
Axis in input corresponding to index k in linear system
tol : float
CG tolerance
mit : int
CG maximum iterations
isn : array_like
CG initial solution
Returns
-------
x : ndarray
Linear system solution :math:`\mathbf{x}`
cgit : int
Number of CG iterations
"""
a = np.conj(ah)
if isn is not None:
isn = isn.ravel()
Aop = lambda x: inner(ah, x, axis=axisM)
AHop = lambda x: inner(a, x, axis=axisK)
AHAop = lambda x: AHop(Aop(x))
vAHAoprI = lambda x: AHAop(x.reshape(b.shape)).ravel() + rho*x.ravel()
lop = LinearOperator((b.size, b.size), matvec=vAHAoprI, dtype=b.dtype)
vx, cgit = cg(lop, b.ravel(), isn, tol, mit)
return vx.reshape(b.shape), cgit
def solve(A, b, method, tol=1e-3):
""" General sparse solver interface.
method can be one of
- spsolve_umfpack_mmd_ata
- spsolve_umfpack_colamd
- spsolve_superlu_mmd_ata
- spsolve_superlu_colamd
- bicg
- bicgstab
- cg
- cgs
- gmres
- lgmres
- minres
- qmr
- lsqr
- lsmr
"""
if method == 'spsolve_umfpack_mmd_ata':
return spla.spsolve(A,b,use_umfpack=True, permc_spec='MMD_ATA')
elif method == 'spsolve_umfpack_colamd':
return spla.spsolve(A,b,use_umfpack=True, permc_spec='COLAMD')
elif method == 'spsolve_superlu_mmd_ata':
return spla.spsolve(A,b,use_umfpack=False, permc_spec='MMD_ATA')
elif method == 'spsolve_superlu_colamd':
return spla.spsolve(A,b,use_umfpack=False, permc_spec='COLAMD')
elif method == 'bicg':
res = spla.bicg(A,b,tol=tol)
return res[0]
elif method == 'bicgstab':
res = spla.bicgstab(A,b,tol=tol)
return res[0]
elif method == 'cg':
res = spla.cg(A,b,tol=tol)
return res[0]
elif method == 'cgs':
res = spla.cgs(A,b,tol=tol)
return res[0]
elif method == 'gmres':
res = spla.gmres(A,b,tol=tol)
return res[0]
elif method == 'lgmres':
res = spla.lgmres(A,b,tol=tol)
return res[0]
elif method == 'minres':
res = spla.minres(A,b,tol=tol)
return res[0]
elif method == 'qmr':
res = spla.qmr(A,b,tol=tol)
return res[0]
elif method == 'lsqr':
res = spla.lsqr(A,b,atol=tol,btol=tol)
return res[0]
elif method == 'lsmr':
res = spla.lsmr(A,b,atol=tol,btol=tol)
return res[0]
else:
raise Exception('UnknownSolverType')
def update_W(m_opts, m_vars):
# print "Updating W"
if not m_opts['use_grad']:
sigma = m_vars['X_batch_T'].dot(m_vars['X_batch']) + m_opts['lam_w']*ssp.eye(m_vars['n_features'], format="csr")
m_vars['sigma_W'] = (1-m_vars['gamma'])*m_vars['sigma_W'] + m_vars['gamma']*sigma
x = m_vars['X_batch'].T.dot(m_vars['U_batch'])
m_vars['x_W'] = (1-m_vars['gamma'])*m_vars['x_W'] + m_vars['gamma']*x
if m_opts['use_cg'] != True: # For the Ridge regression on W matrix with the closed form solutions
if ssp.issparse(m_vars['sigma_W']):
m_vars['sigma_W'] = m_vars['sigma_W'].todense()
sigma = linalg.inv(m_vars['sigma_W']) # O(N^3) time for N x N matrix inversion
m_vars['W'] = np.asarray(sigma.dot(m_vars['x_W'])).T
else: # For the CG on the ridge loss to calculate W matrix
if not m_opts['use_grad']:
# assert m_vars['X_batch'].shape[0] == m_vars['U_batch'].shape[0]
X = m_vars['sigma_W']
for i in range(m_opts['n_components']):
y = m_vars['x_W'][:, i]
w,info = sp_linalg.cg(X, y, x0=m_vars['W'][i,:], maxiter=m_opts['cg_iters'])
if info < 0:
print "WARNING: sp_linalg.cg info: illegal input or breakdown"
m_vars['W'][i, :] = w.T
else:
''' Solving X*W' = U '''
# print "Using grad!"
my_invert = lambda x: x if x<1 else 1.0/x
l2_norm = lambda x: np.sqrt((x**2).sum())
def clip_by_norm(x, clip_max):
x_norm = l2_norm(x)
if x_norm > clip_max:
# print "Clipped!",clip_max
x = clip_max*(x/x_norm)
return x
lr = m_opts['grad_alpha']*(1.0 + np.arange(m_opts['cg_iters']*10))**(-0.9) #(-0.75)
try:
W_old = m_vars['W'].copy()
tail_norm, curr_norm = 1.0,1.0
for iter_idx in range(m_opts['cg_iters']*10):
grad = m_vars['X_batch_T'].dot(m_vars['X_batch'].dot(m_vars['W'].T) - m_vars['U_batch'])
grad = lr[iter_idx]*(grad.T + m_opts['lam_w']*m_vars['W'])
tail_norm = 0.5*curr_norm + (1-0.5)*tail_norm
curr_norm = l2_norm(grad)
if curr_norm < 1e-15:
return
elif iter_idx > 10 and my_invert(np.abs(tail_norm/curr_norm)) > 0.8:
# print "Halved!"
lr = lr/2.0
m_vars['W'] = m_vars['W'] - clip_by_norm(grad, 1e0) # Clip by norm
Delta_W = l2_norm(m_vars['W']-W_old)
except FloatingPointError:
print "FloatingPointError in:"
print grad
assert False
def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0):
n_samples, n_features = X.shape
X1 = sp_linalg.aslinearoperator(X)
coefs = np.empty((y.shape[1], n_features))
if n_features > n_samples:
def create_mv(curr_alpha):
def _mv(x):
return X1.matvec(X1.rmatvec(x)) + curr_alpha * x
return _mv
else:
def create_mv(curr_alpha):
def _mv(x):
return X1.rmatvec(X1.matvec(x)) + curr_alpha * x
return _mv
for i in range(y.shape[1]):
y_column = y[:, i]
mv = create_mv(alpha[i])
if n_features > n_samples:
# kernel ridge
# w = X.T * inv(X X^t + alpha*Id) y
C = sp_linalg.LinearOperator(
(n_samples, n_samples), matvec=mv, dtype=X.dtype)
coef, info = sp_linalg.cg(C, y_column, tol=tol)
coefs[i] = X1.rmatvec(coef)
else:
# linear ridge
# w = inv(X^t X + alpha*Id) * X.T y
y_column = X1.rmatvec(y_column)
C = sp_linalg.LinearOperator(
(n_features, n_features), matvec=mv, dtype=X.dtype)
coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter,
tol=tol)
if info < 0:
raise ValueError("Failed with error code %d" % info)
if max_iter is None and info > 0 and verbose:
warnings.warn("sparse_cg did not converge after %d iterations." %
info)
return coefs