def _fit_regression(self, y):
"""
fit regression using pseudo-inverse
or supplied regressor
"""
if (self.regressor is None):
self.coefs_ = safe_sparse_dot(pinv2(self.hidden_activations_), y)
else:
self.regressor.fit(self.hidden_activations_, y)
self.fitted_ = True
python类safe_sparse_dot()的实例源码
def _get_predictions(self):
"""get predictions using internal least squares/supplied regressor"""
if (self.regressor is None):
preds = safe_sparse_dot(self.hidden_activations_, self.coefs_)
else:
preds = self.regressor.predict(self.hidden_activations_)
return preds
def getWordTypeCooccurPieces(self, dtype=np.float32):
""" Calculate building blocks for word-word cooccur calculation
These pieces can be used for incremental construction.
Returns
-------
Q : 2D matrix, W x W (where W is vocab_size)
sameWordVec : 1D array, size W
nDoc : scalar
"""
sameWordVec = np.zeros(self.vocab_size)
data = np.zeros(self.word_count.shape, dtype=dtype)
for docID in xrange(self.nDoc):
start = self.doc_range[docID]
stop = self.doc_range[docID + 1]
N = self.word_count[start:stop].sum()
NNm1 = N * (N - 1)
sameWordVec[self.word_id[start:stop]] += \
self.word_count[start:stop] / NNm1
data[start:stop] = self.word_count[start:stop] / np.sqrt(NNm1)
# Now, create a sparse matrix that's D x V
sparseDocWordMat = scipy.sparse.csr_matrix(
(data, self.word_id, self.doc_range),
shape=(self.nDoc, self.vocab_size),
dtype=dtype)
# Q : V x V
from sklearn.utils.extmath import safe_sparse_dot
Q = safe_sparse_dot(
sparseDocWordMat.T, sparseDocWordMat, dense_output=1)
return Q, sameWordVec, self.nDoc
def getWordTypeCooccurPieces(self, dtype=np.float32):
""" Calculate building blocks for word-word cooccur calculation
These pieces can be used for incremental construction.
Returns
-------
Q : 2D matrix, W x W (where W is vocab_size)
sameWordVec : 1D array, size W
nDoc : scalar
"""
sameWordVec = np.zeros(self.vocab_size)
data = np.zeros(self.word_count.shape, dtype=dtype)
for docID in xrange(self.nDoc):
start = self.doc_range[docID]
stop = self.doc_range[docID + 1]
N = self.word_count[start:stop].sum()
NNm1 = N * (N - 1)
sameWordVec[self.word_id[start:stop]] += \
self.word_count[start:stop] / NNm1
data[start:stop] = self.word_count[start:stop] / np.sqrt(NNm1)
# Now, create a sparse matrix that's D x V
sparseDocWordMat = scipy.sparse.csr_matrix(
(data, self.word_id, self.doc_range),
shape=(self.nDoc, self.vocab_size),
dtype=dtype)
# Q : V x V
from sklearn.utils.extmath import safe_sparse_dot
Q = safe_sparse_dot(
sparseDocWordMat.T, sparseDocWordMat, dense_output=1)
return Q, sameWordVec, self.nDoc
def test_svc_with_custom_kernel():
kfunc = lambda x, y: safe_sparse_dot(x, y.T)
clf_lin = svm.SVC(kernel='linear').fit(X_sp, Y)
clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
def predict(self, X):
n = (len(self.we) - 1) / 2
w0 = self.we[0]
w = self.we[1:n+1] - self.we[n+1:]
return expit(w0 + safe_sparse_dot(X, w))
def reduce(self, Y):
return safe_sparse_dot(self.E, Y)
def reduce(self, Y):
return safe_sparse_dot(self.R, Y)
def reduce(self, Y):
return safe_sparse_dot(self.W1, Y) * safe_sparse_dot(self.W2, Y) / np.sqrt(self.k)
def score(self, user, candidates, context):
# i_mat is (n_item_context, n_item) for all possible items
# extract only target items
i_mat = self.i_mat[:, candidates]
n_target = len(candidates)
u_vec = user.encode(dim=self.n_user,
index=self.use_index,
feature=True,
vertical=True)
u_vec = np.concatenate((u_vec, np.array([context]).T))
u_mat = sp.csr_matrix(np.repeat(u_vec, n_target, axis=1))
mat = sp.vstack((u_mat, i_mat))
# Matrix A and B should be dense (numpy array; rather than scipy CSR matrix) because V is dense.
V = sp.csr_matrix(self.V)
A = safe_sparse_dot(V.T, mat)
A.data[:] = A.data ** 2
sq_mat = mat.copy()
sq_mat.data[:] = sq_mat.data ** 2
sq_V = V.copy()
sq_V.data[:] = sq_V.data ** 2
B = safe_sparse_dot(sq_V.T, sq_mat)
interaction = (A - B).sum(axis=0)
interaction /= 2. # (1, n_item); numpy matrix form
pred = self.w0 + safe_sparse_dot(self.w, mat, dense_output=True) + interaction
return np.abs(1. - np.ravel(pred))
def _get_potentials(self, x, w):
# check sizes?
n_node_coefs = self.n_prop_states * self.n_prop_features
n_link_coefs = self.n_link_states * self.n_link_features
n_compat_coefs = self.n_prop_states ** 2 * self.n_link_states
if self.compat_features:
n_compat_coefs *= self.n_compat_features_
assert w.size == (n_node_coefs + n_link_coefs + n_compat_coefs +
self.n_second_order_features_ *
self.n_second_order_factors_)
w_node = w[:n_node_coefs]
w_node = w_node.reshape(self.n_prop_states, self.n_prop_features)
w_link = w[n_node_coefs:n_node_coefs + n_link_coefs]
w_link = w_link.reshape(self.n_link_states, self.n_link_features)
# for readability, consume w. This is not inplace, don't worry.
w = w[n_node_coefs + n_link_coefs:]
w_compat = w[:n_compat_coefs]
if self.compat_features:
w_compat = w_compat.reshape((self.n_compat_features_, -1))
w_compat = np.dot(x.X_compat, w_compat)
compat_potentials = w_compat.reshape((-1,
self.n_prop_states,
self.n_prop_states,
self.n_link_states))
else:
compat_potentials = w_compat.reshape(self.n_prop_states,
self.n_prop_states,
self.n_link_states)
w = w[n_compat_coefs:]
coparent_potentials = grandparent_potentials = sibling_potentials = []
if self.coparents:
w_coparent = w[:self.n_second_order_features_]
coparent_potentials = safe_sparse_dot(x.X_sec_ord, w_coparent)
w = w[self.n_second_order_features_:]
if self.grandparents:
w_grandparent = w[:self.n_second_order_features_]
grandparent_potentials = safe_sparse_dot(x.X_sec_ord,
w_grandparent)
w = w[self.n_second_order_features_:]
if self.siblings:
w_sibling = w[:self.n_second_order_features_]
sibling_potentials = safe_sparse_dot(x.X_sec_ord, w_sibling)
prop_potentials = safe_sparse_dot(x.X_prop, w_node.T)
link_potentials = safe_sparse_dot(x.X_link, w_link.T)
return (prop_potentials, link_potentials, compat_potentials,
coparent_potentials, grandparent_potentials,
sibling_potentials)
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None):
"""Computes the logistic loss and gradient.
Parameters
----------
w : ndarray, shape (n_features,) or (n_features + 1,)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
y : ndarray, shape (n_samples,)
Array of labels.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
If not provided, then each sample is given unit weight.
Returns
-------
out : float
Logistic loss.
grad : ndarray, shape (n_features,) or (n_features + 1,)
Logistic gradient.
"""
_, n_features = X.shape
grad = np.empty_like(w)
w, c, yz = _intercept_dot(w, X, y)
if sample_weight is None:
sample_weight = np.ones(y.shape[0])
# Logistic loss is the negative of the log of the logistic function.
out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
z = expit(yz)
z0 = sample_weight * (z - 1) * y
grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w
# Case where we fit the intercept.
if grad.shape[0] > n_features:
grad[-1] = z0.sum()
return out, grad
def _multinomial_loss(w, X, Y, alpha, sample_weight):
"""Computes multinomial loss and class probabilities.
Parameters
----------
w : ndarray, shape (n_classes * n_features,) or
(n_classes * (n_features + 1),)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
Y : ndarray, shape (n_samples, n_classes)
Transformed labels according to the output of LabelBinarizer.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
If not provided, then each sample is given unit weight.
Returns
-------
loss : float
Multinomial loss.
p : ndarray, shape (n_samples, n_classes)
Estimated class probabilities.
w : ndarray, shape (n_classes, n_features)
Reshaped param vector excluding intercept terms.
Reference
---------
Bishop, C. M. (2006). Pattern recognition and machine learning.
Springer. (Chapter 4.3.4)
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
fit_intercept = w.size == (n_classes * (n_features + 1))
w = w.reshape(n_classes, -1)
alpha = alpha.reshape(n_classes, -1)
sample_weight = sample_weight[:, np.newaxis]
if fit_intercept:
intercept = w[:, -1]
w = w[:, :-1]
else:
intercept = 0
p = safe_sparse_dot(X, w.T)
p += intercept
p -= logsumexp(p, axis=1)[:, np.newaxis]
loss = -(sample_weight * Y * p).sum()
loss += 0.5 * ((alpha + L2_REG) * w * w).sum()
p = np.exp(p, p)
return loss, p, w
def _multinomial_loss_grad(w, X, Y, alpha, sample_weight):
"""Computes the multinomial loss, gradient and class probabilities.
Parameters
----------
w : ndarray, shape (n_classes * n_features,) or
(n_classes * (n_features + 1),)
Coefficient vector.
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data.
Y : ndarray, shape (n_samples, n_classes)
Transformed labels according to the output of LabelBinarizer.
alpha : float
Regularization parameter. alpha is equal to 1 / C.
sample_weight : array-like, shape (n_samples,) optional
Array of weights that are assigned to individual samples.
Returns
-------
loss : float
Multinomial loss.
grad : ndarray, shape (n_classes * n_features,) or
(n_classes * (n_features + 1),)
Ravelled gradient of the multinomial loss.
p : ndarray, shape (n_samples, n_classes)
Estimated class probabilities
Reference
---------
Bishop, C. M. (2006). Pattern recognition and machine learning.
Springer. (Chapter 4.3.4)
"""
n_classes = Y.shape[1]
n_features = X.shape[1]
fit_intercept = (w.size == n_classes * (n_features + 1))
grad = np.zeros((n_classes, n_features + bool(fit_intercept)))
alpha = alpha.reshape(n_classes, -1)
loss, p, w = _multinomial_loss(w, X, Y, alpha, sample_weight)
sample_weight = sample_weight[:, np.newaxis]
diff = sample_weight * (p - Y)
grad[:, :n_features] = safe_sparse_dot(diff.T, X)
grad[:, :n_features] += (alpha + L2_REG) * w
if fit_intercept:
grad[:, -1] = diff.sum(axis=0)
return loss, grad.ravel(), p