def temp_log_loss(w, X, Y, alpha):
n_classes = Y.shape[1]
w = w.reshape(n_classes, -1)
intercept = w[:, -1]
w = w[:, :-1]
z = safe_sparse_dot(X, w.T) + intercept
denom = expit(z)
#print denom
#print denom.sum()
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
#print denom
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum()
loss += 0.5 * alpha * squared_norm(w)
return loss
python类squared_norm()的实例源码
def logistic_loss(w, X, Y, alpha):
"""
Implementation of the logistic loss function when Y is a probability
distribution.
loss = -SUM_i SUM_k y_ik * log(P[yi == k]) + alpha * ||w||^2
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
intercept = 0
if n_classes > 2:
fit_intercept = w.size == (n_classes * (n_features + 1))
w = w.reshape(n_classes, -1)
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
fit_intercept = w.size == (n_features + 1)
if fit_intercept:
intercept = w[-1]
w = w[:-1]
z = safe_sparse_dot(X, w.T) + intercept
if n_classes == 2:
# in the binary case, simply compute the logistic function
p = np.vstack([log_logistic(-z), log_logistic(z)]).T
else:
# compute the logistic function for each class and normalize
denom = expit(z)
denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
p = log_logistic(z)
loss = - (Y * p).sum()
loss += np.log(denom).sum() # Y.sum() = 1
loss += 0.5 * alpha * squared_norm(w)
return loss
loss = - (Y * p).sum() + 0.5 * alpha * squared_norm(w)
return loss
def test_norm_squared_norm():
X = np.random.RandomState(42).randn(50, 63)
X *= 100 # check stability
X += 200
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
def norm(x):
"""Dot product-based Euclidean norm implementation
See: http://fseoane.net/blog/2011/computing-the-vector-norm/
"""
return math.sqrt(squared_norm(x))
def _kmeans_single_lloyd(X, n_clusters, max_iter=300, init='k-means||',
verbose=False, x_squared_norms=None,
random_state=None, tol=1e-4,
precompute_distances=True,
oversampling_factor=2,
init_max_iter=None):
centers = k_init(X, n_clusters, init=init,
oversampling_factor=oversampling_factor,
random_state=random_state, max_iter=init_max_iter)
dt = X.dtype
P = X.shape[1]
for i in range(max_iter):
t0 = tic()
labels, distances = pairwise_distances_argmin_min(
X, centers, metric='euclidean', metric_kwargs={"squared": True}
)
labels = labels.astype(np.int32)
# distances is always float64, but we need it to match X.dtype
# for centers_dense, but remain float64 for inertia
r = da.atop(_centers_dense, 'ij',
X, 'ij',
labels, 'i',
n_clusters, None,
distances.astype(X.dtype), 'i',
adjust_chunks={"i": n_clusters, "j": P},
dtype=X.dtype)
new_centers = da.from_delayed(
sum(r.to_delayed().flatten()),
(n_clusters, P),
X.dtype
)
counts = da.bincount(labels, minlength=n_clusters)
# Require at least one per bucket, to avoid division by 0.
counts = da.maximum(counts, 1)
new_centers = new_centers / counts[:, None]
new_centers, = compute(new_centers)
# Convergence check
shift = squared_norm(centers - new_centers)
t1 = tic()
logger.info("Lloyd loop %2d. Shift: %0.4f [%.2f s]", i, shift, t1 - t0)
if shift < tol:
break
centers = new_centers
if shift > 1e-7:
labels, distances = pairwise_distances_argmin_min(X, centers)
labels = labels.astype(np.int32)
inertia = distances.sum()
centers = centers.astype(dt)
return labels, inertia, centers, i + 1