def temp_log_loss(w, X, Y, alpha):
n_classes = Y.shape[1]
w = w.reshape(n_classes, -1)
intercept = w[:, -1]
w = w[:, :-1]
z = safe_sparse_dot(X, w.T) + intercept
denom = expit(z)
#print denom
#print denom.sum()
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
#print denom
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum()
loss += 0.5 * alpha * squared_norm(w)
return loss
python类log_logistic()的实例源码
def fgrad(we, X, y, l1, l2):
nsamples, nfactors = X.shape
w0 = we[0]
w = we[1:(nfactors+1)] - we[(nfactors+1):]
yz = y * (safe_sparse_dot(X, w) + w0)
f = - np.sum(log_logistic(yz)) + l1 * np.sum(we[1:]) + 0.5 * l2 * np.dot(w, w)
e = (expit(yz) - 1) * y
g = safe_sparse_dot(X.T, e) + l2 * w
g0 = np.sum(e)
grad = np.concatenate([g, -g]) + l1
grad = np.insert(grad, 0, g0)
return f, grad
def logistic_loss(w, X, Y, alpha):
"""
Implementation of the logistic loss function when Y is a probability
distribution.
loss = -SUM_i SUM_k y_ik * log(P[yi == k]) + alpha * ||w||^2
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
intercept = 0
if n_classes > 2:
fit_intercept = w.size == (n_classes * (n_features + 1))
w = w.reshape(n_classes, -1)
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
fit_intercept = w.size == (n_features + 1)
if fit_intercept:
intercept = w[-1]
w = w[:-1]
z = safe_sparse_dot(X, w.T) + intercept
if n_classes == 2:
# in the binary case, simply compute the logistic function
p = np.vstack([log_logistic(-z), log_logistic(z)]).T
else:
# compute the logistic function for each class and normalize
denom = expit(z)
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum() # Y.sum() = 1
loss += 0.5 * alpha * squared_norm(w)
return loss
loss = - (Y * p).sum() + 0.5 * alpha * squared_norm(w)
return loss
def _logistic_loss(w, X, y, alpha, sample_weight=None):
"""Computes the logistic loss.
Parameters
----------
w : ndarray, shape (n_features,) or (n_features + 1,)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
y : ndarray, shape (n_samples,)
Array of labels.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
If not provided, then each sample is given unit weight.
Returns
-------
out : float
Logistic loss.
"""
w, c, yz = _intercept_dot(w, X, y)
if sample_weight is None:
sample_weight = np.ones(y.shape[0])
# Logistic loss is the negative of the log of the logistic function.
out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
return out
def test_logistic_sigmoid():
# Check correctness and robustness of logistic sigmoid implementation
def naive_log_logistic(x):
return np.log(1 / (1 + np.exp(-x)))
x = np.linspace(-2, 2, 50)
assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))
extreme_x = np.array([-100., 100.])
assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
def score_samples(self, X):
"""Compute the pseudo-likelihood of X.
X : {array-like, sparse matrix} shape (n_samples, n_features)
Values of the visible layer. Must be all-boolean (not checked).
Returns
-------
pseudo_likelihood : array-like, shape (n_samples,)
Value of the pseudo-likelihood (proxy for likelihood).
Notes
-----
This method is not deterministic: it computes a quantity called the
free energy on X, then on a randomly corrupted version of X, and
returns the log of the logistic function of the difference.
"""
check_is_fitted(self, "components_")
v = check_array(X, accept_sparse='csr')
fe = self._free_energy(v)
v_, state = self.corrupt(v)
# TODO: If I wanted to be really fancy here, I would do one of those "with..." things.
fe_corrupted = self._free_energy(v)
self.uncorrupt(v, state)
# See https://en.wikipedia.org/wiki/Pseudolikelihood
# Let x be some visible vector. x_i is the ith entry. x_-i is the vector except that entry.
# x_iflipped is x with the ith bit flipped. F() is free energy.
# P(x_i | x_-i) = P(x) / P(x_-i) = P(x) / (P(x) + p(x_iflipped))
# expand def'n of P(x), cancel out the partition function on each term, and divide top and bottom by e^{-F(x)} to get...
# 1 / (1 + e^{F(x) - F(x_iflipped)})
# So we're just calculating the log of that. We multiply by the number of
# visible units because we're approximating P(x) as the product of the conditional likelihood
# of each individual unit. But we're too lazy to do each one individually, so we say the unit
# we tested represents an average.
if hasattr(self, 'codec'):
normalizer = self.codec.shape()[0]
else:
normalizer = v.shape[1]
return normalizer * log_logistic(fe_corrupted - fe)
# TODO: No longer used
def logistic_grad(w, X, Y, alpha):
"""
Implementation of the logistic loss gradient when Y is a multi-ary
probability distribution.
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
fit_intercept = w.size == (n_classes * (n_features + 1))
grad = np.zeros((n_classes, n_features + int(fit_intercept)))
w = w.reshape(n_classes, -1)
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
intercept = 0
z = safe_sparse_dot(X, w.T) + intercept
# normalization factor
denom = expit(z)
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
#
# d/dwj log(denom)
# = 1/denom * d/dw expit(wj * x + b)
# = 1/denom * expit(wj * x + b) * expit(-(wj * x + b)) * x
#
# d/dwj -Y * log_logistic(z)
# = -Y * expit(-(wj * x + b)) * x
#
z0 = (np.reciprocal(denom) * expit(z) - Y) * expit(-z)
grad[:, :n_features] = safe_sparse_dot(z0.T, X)
grad[:, :n_features] += alpha * w
if fit_intercept:
grad[:, -1] = z0.sum(axis=0)
return grad.ravel()
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None):
"""Computes the logistic loss and gradient.
Parameters
----------
w : ndarray, shape (n_features,) or (n_features + 1,)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
y : ndarray, shape (n_samples,)
Array of labels.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
If not provided, then each sample is given unit weight.
Returns
-------
out : float
Logistic loss.
grad : ndarray, shape (n_features,) or (n_features + 1,)
Logistic gradient.
"""
_, n_features = X.shape
grad = np.empty_like(w)
w, c, yz = _intercept_dot(w, X, y)
if sample_weight is None:
sample_weight = np.ones(y.shape[0])
# Logistic loss is the negative of the log of the logistic function.
out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
z = expit(yz)
z0 = sample_weight * (z - 1) * y
grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w
# Case where we fit the intercept.
if grad.shape[0] > n_features:
grad[-1] = z0.sum()
return out, grad